Machine Learning And Artificial Intelligence for Prediction of OPC Strength, RAC Strength, RAC Carbonization, RAC Chloride Erosion & RAC Sulfate Corrosion¶

Library Imports¶

In [ ]:
import sys,os
import sklearn
import matplotlib.pyplot as plt
from sklearn import metrics
import seaborn as sns
from sklearn import preprocessing
In [ ]:
import xgboost as xgb
import pandas as pd
import numpy as np
import warnings
import plotly.express as px
warnings.filterwarnings("ignore")

Datasheet Imports¶

In [ ]:
#detailed_rac = pd.read_csv("Materials 1645501 DataSheetold.csv")
carbonization_rac = pd.read_csv("Carbonization_Experimental_data.csv")
chloride_ion_erosion_rac = pd.read_csv("Chloride_Ion_Erosion_Experimental_data.csv")
sulfate_corrosion_rac = pd.read_csv("Sulfate_Corrosion_Experimental_data.csv")
cs = pd.read_csv("Concrete_Data.csv")
In [ ]:
detailed_rac = pd.read_csv("Materials 1771857 Datasheet.csv")
     Sulfate Corrosion Categorical To Continuous Conversion Done In Excel for Machine Learning

(Cation Type) Na Class Variable >> 1 Mg Class Variable >> 2

(Immersion Type) Full Immersion >> 1 Dry-Wet Cycle >> 2

Statistical Visualization¶

Descriptive Statistics to see the distribution of various parameters¶
In [ ]:
data = sulfate_corrosion_rac.describe()
plt.figure(figsize = (20,8))
sns.heatmap(data, center = 0.5, cmap = 'coolwarm', annot= True, xticklabels = data.columns, yticklabels = data.index,
            cbar= True, linewidths= 1,fmt=".2f")   # Da Heatmap
plt.show()
No description has been provided for this image
In [ ]:
data = chloride_ion_erosion_rac.describe()
plt.figure(figsize = (20,8))
sns.heatmap(data, center = 0.5, cmap = 'coolwarm', annot= True, xticklabels = data.columns, yticklabels = data.index,
            cbar= True, linewidths= 1,fmt=".2f")   # Da Heatmap
plt.show()
No description has been provided for this image
In [ ]:
data = carbonization_rac.describe()
plt.figure(figsize = (20,8))
sns.heatmap(data, center = 0.5, cmap = 'coolwarm', annot= True, xticklabels = data.columns, yticklabels = data.index,
            cbar= True, linewidths= 1,fmt=".2f")   # Da Heatmap
plt.show()
No description has been provided for this image
In [ ]:
data = detailed_rac.describe()
plt.figure(figsize = (20,8))
sns.heatmap(data, center = 0.5, cmap = 'coolwarm', annot= True, xticklabels = data.columns, yticklabels = data.index,
            cbar= True, linewidths= 1,fmt=".2f")   # Da Heatmap
plt.show()
No description has been provided for this image
In [ ]:
data = cs.describe()
plt.figure(figsize = (20,8))
sns.heatmap(data, center = 0.5, cmap = 'coolwarm', annot= True, xticklabels = data.columns, yticklabels = data.index,
            cbar= True, linewidths= 1,fmt=".2f")   # Da Heatmap
plt.show()
No description has been provided for this image
Printing the first few records to see the data has been imported correctly¶
In [ ]:
sulfate_corrosion_rac.head(5)
Out[ ]:
C W NCA RCA SF WA D CT SO4 IT WDT DDT DT Cycle CS MFS MCS SP K
0 350 176.0 791.0 0.0 140 1.3 2580.0 1 5.0 1 24 0 0 0 48.18 3.0 15.0 7.0 1.00
1 350 176.0 791.0 0.0 140 1.3 2580.0 1 5.0 1 24 0 0 90 48.18 3.0 15.0 7.0 0.93
2 350 176.0 791.0 0.0 140 1.3 2580.0 1 5.0 1 24 0 0 180 48.18 3.0 15.0 7.0 0.83
3 350 176.0 791.0 0.0 140 1.3 2580.0 1 5.0 1 24 0 0 360 48.18 3.0 15.0 7.0 0.76
4 350 176.0 0.0 791.0 140 2.5 2540.0 1 5.0 1 24 0 0 0 51.43 3.0 15.0 7.0 1.00
In [ ]:
chloride_ion_erosion_rac.head(5)
Out[ ]:
CA W S C NCA RCA WA D FA GGBS SF MFS MCS SP CS Q
0 28 225.0 642.0 410.0 1048.0 0.0 1.11 2620 0.0 0.0 0.0 5.0 20 0.0 35.09 6287.0
1 28 225.0 642.0 410.0 524.0 506.0 3.54 2520 0.0 0.0 0.0 5.0 20 0.0 30.69 6715.0
2 28 225.0 642.0 410.0 0.0 1017.0 5.96 2410 0.0 0.0 0.0 5.0 20 0.0 27.51 6910.0
3 28 225.0 611.0 307.5 1048.0 0.0 1.11 2620 102.5 0.0 0.0 5.0 20 0.0 31.48 4204.0
4 28 225.0 611.0 307.5 524.0 506.0 3.54 2520 102.5 0.0 0.0 5.0 20 0.0 30.11 4710.0
In [ ]:
carbonization_rac.head(5)
Out[ ]:
W C CO2% T RH ET WA R CA MFS MCS SP CS Depth
0 170.0 425.0 10.0 40 70 28 0.94 0.0 28 5.0 20.0 0.0 38.48 1.63
1 170.0 425.0 10.0 40 70 28 3.96 100.0 28 5.0 20.0 0.0 38.48 3.08
2 175.0 318.0 10.0 40 70 28 0.94 0.0 28 5.0 20.0 0.0 38.48 8.66
3 175.0 318.0 10.0 40 70 28 4.38 100.0 28 5.0 20.0 0.0 53.30 10.28
4 175.0 318.0 10.0 40 70 28 3.30 100.0 28 5.0 20.0 0.0 38.48 10.16
In [ ]:
detailed_rac
Out[ ]:
Ref Water kg/m^3 Cement kg/m^3 Sand NCA RCA SP Dia Max Density RCA W RCA UCS
0 [1] 165.0 370 650.0 850.5 364.5 2.22 20 2400 4.9 50.6
1 NaN 165.0 370 650.0 607.5 607.5 2.22 20 2400 4.9 50.8
2 NaN 165.0 370 650.0 0.0 1215.0 2.22 20 2400 4.9 50.2
3 NaN 165.0 460 575.0 850.5 364.5 2.22 20 2400 4.9 60.8
4 NaN 165.0 460 575.0 607.5 607.5 2.22 20 2400 4.9 61.2
... ... ... ... ... ... ... ... ... ... ... ...
312 NaN 190.4 280 873.0 0.0 962.0 0.00 22 2458 5.8 39.7
313 NaN 157.5 350 858.0 0.0 1016.0 3.50 22 2464 3.9 66.5
314 [33] 179.0 275 878.0 735.0 184.0 0.00 19 2320 5.3 49.3
315 NaN 179.0 275 849.0 455.0 455.0 0.00 19 2320 5.3 47.5
316 NaN 179.0 275 868.0 0.0 830.0 0.00 19 2320 5.3 53.7

317 rows × 11 columns

In [ ]:
detailed_rac.drop(columns=['Ref'],inplace=True)
In [ ]:
detailed_rac.head(5)
Out[ ]:
Water kg/m^3 Cement kg/m^3 Sand NCA RCA SP Dia Max Density RCA W RCA UCS
0 165.0 370 650.0 850.5 364.5 2.22 20 2400 4.9 50.6
1 165.0 370 650.0 607.5 607.5 2.22 20 2400 4.9 50.8
2 165.0 370 650.0 0.0 1215.0 2.22 20 2400 4.9 50.2
3 165.0 460 575.0 850.5 364.5 2.22 20 2400 4.9 60.8
4 165.0 460 575.0 607.5 607.5 2.22 20 2400 4.9 61.2
In [ ]:
cs.head(5)
Out[ ]:
C Slag FA Water SP Cagg Fagg Age CS (Mpa)
0 540.0 0.0 0.0 162.0 2.5 1040.0 676.0 28 79.99
1 540.0 0.0 0.0 162.0 2.5 1055.0 676.0 28 61.89
2 332.5 142.5 0.0 228.0 0.0 932.0 594.0 270 40.27
3 332.5 142.5 0.0 228.0 0.0 932.0 594.0 365 41.05
4 198.6 132.4 0.0 192.0 0.0 978.4 825.5 360 44.30

Data Visualisation¶

Pairplotting is used to see multi-collinearity and reduncancy of features if any. In our workflow we removed majority of the redundant features beforehand before importing the datasets.¶

In [ ]:
#sns.pairplot(cs,kind='reg',plot_kws={'line_kws':{'color':'red'}})
In [ ]:
#sns.pairplot(detailed_rac,kind='reg',plot_kws={'line_kws':{'color':'red'}})
In [ ]:
#sns.pairplot(carbonization_rac,kind='reg',plot_kws={'line_kws':{'color':'red'}})
In [ ]:
#sns.pairplot(chloride_ion_erosion_rac,kind='reg',plot_kws={'line_kws':{'color':'red'}})
In [ ]:
#sns.pairplot(sulfate_corrosion_rac,kind='reg',plot_kws={'line_kws':{'color':'red'}})
In [ ]:
import plotly_express as px

Pearson Correlation Matrix For Feature Selection and Feature Importance Visualisation¶

In [ ]:
corr = carbonization_rac.corr() # correlation matrix
corr.to_csv('./graphs_data/corr_c.csv', index=False)
lower_triangle = np.tril(corr, k = -1)  # select only the lower triangle of the correlation matrix
mask = lower_triangle == 0  # to mask the upper triangle in the following heatmap

plt.figure(figsize = (12,10))
sns.heatmap(lower_triangle, center = 0.5, cmap = 'viridis', annot= True, xticklabels = corr.index, yticklabels = corr.columns,
            cbar= True, linewidths= 1, mask = mask)   # Da Heatmap
plt.title('Correlation Matrix For Variables To Be Used For RAC Carbonization Depth Prediction',pad=10)
plt.show()
No description has been provided for this image
In [ ]:
corr = chloride_ion_erosion_rac.corr() # correlation matrix
corr.to_csv('./graphs_data/corr_cl.csv', index=False)
lower_triangle = np.tril(corr, k = -1)  # select only the lower triangle of the correlation matrix
mask = lower_triangle == 0  # to mask the upper triangle in the following heatmap

plt.figure(figsize = (16,10))
sns.heatmap(lower_triangle, center = 0.5, cmap = 'viridis', annot= True, xticklabels = corr.index, yticklabels = corr.columns,
            cbar= True, linewidths= 1, mask = mask)   # Da Heatmap
plt.title('Correlation Matrix For Variables To Be Used For RAC Chloride Ion Erosion Of Rft',pad=10)
plt.show()
No description has been provided for this image
In [ ]:
corr = sulfate_corrosion_rac.corr() # correlation matrix
corr.to_csv('./graphs_data/corr_sf.csv', index=False)
lower_triangle = np.tril(corr, k = -1)  # select only the lower triangle of the correlation matrix
mask = lower_triangle == 0  # to mask the upper triangle in the following heatmap

plt.figure(figsize = (16,10))
sns.heatmap(lower_triangle, center = 0.5, cmap = 'viridis', annot= True, xticklabels = corr.index, yticklabels = corr.columns,
            cbar= True, linewidths= 1, mask = mask)   # Da Heatmap
plt.title('Correlation Matrix For Variables To Be Used For RAC Sulfate Corrosion',pad=10)
plt.show()
No description has been provided for this image
In [ ]:
corr = detailed_rac.corr() # correlation matrix
corr.to_csv('./graphs_data/corr_det.csv', index=False)
lower_triangle = np.tril(corr, k = -1)  # select only the lower triangle of the correlation matrix
mask = lower_triangle == 0  # to mask the upper triangle in the following heatmap

plt.figure(figsize = (12,10))
sns.heatmap(lower_triangle, center = 0.5, cmap = 'viridis', annot= True, xticklabels = corr.index, yticklabels = corr.columns,
            cbar= True, linewidths= 1, mask = mask)   # Da Heatmap
plt.title('Correlation Matrix For Variables To Be Used For RAC Strength Prediction',pad=10)
plt.show()
No description has been provided for this image
In [ ]:
corr = abs(cs.corr()) # correlation matrix
corr.to_csv('./graphs_data/corr_cs.csv', index=False)
lower_triangle = np.tril(corr, k = -1)  # select only the lower triangle of the correlation matrix
mask = lower_triangle == 0  # to mask the upper triangle in the following heatmap

plt.figure(figsize = (12,10))
sns.heatmap(lower_triangle, center = 0.5, cmap = 'viridis', annot= True, xticklabels = corr.index, yticklabels = corr.columns,
            cbar= True, linewidths= 1, mask = mask)   # Da Heatmap
plt.title('Correlation Matrix For Variables To Be Used For NAC Strength Prediction',pad=10)
plt.show()
No description has been provided for this image

Data Wrangling And Preprocessing¶

In [ ]:
sulfate_corrosion_rac.shape
Out[ ]:
(206, 19)
In [ ]:
chloride_ion_erosion_rac.shape
Out[ ]:
(225, 16)
In [ ]:
carbonization_rac.shape
Out[ ]:
(452, 14)
In [ ]:
detailed_rac.shape
Out[ ]:
(317, 10)
In [ ]:
cs.shape
Out[ ]:
(1030, 9)
In [ ]:
carbonization_rac.drop_duplicates(inplace=True)
carbonization_rac.dropna(inplace=True)
carbonization_rac.shape
Out[ ]:
(451, 14)
In [ ]:
chloride_ion_erosion_rac.drop_duplicates(inplace=True)
chloride_ion_erosion_rac.dropna(inplace=True)
chloride_ion_erosion_rac.shape
Out[ ]:
(225, 16)
In [ ]:
sulfate_corrosion_rac.drop_duplicates(inplace=True)
sulfate_corrosion_rac.dropna(inplace=True)
sulfate_corrosion_rac.shape
Out[ ]:
(134, 19)
In [ ]:
#Data Preparation
detailed_rac.duplicated().sum()
Out[ ]:
54
In [ ]:
cs.duplicated().sum()
Out[ ]:
25
In [ ]:
cs.drop_duplicates(inplace=True)
cs.dropna(inplace=True)
cs.shape
Out[ ]:
(1005, 9)

Machine Learning Model Imports¶

In [ ]:
#Model Imports
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
from xgboost import XGBRFRegressor

Data Splitting Into Feature & Target Variables¶

In [ ]:
#Data Splits Into Features & Targets
#X_detailed = detailed_rac.drop(columns=['Compressive Strength (Mpa)','Flexural Strength (Mpa)'])
X_detailed = detailed_rac.drop(columns=['UCS'])

#Y_detailed = detailed_rac[['Compressive Strength (Mpa)']]
Y_detailed = detailed_rac[['UCS']]

X_cs = cs.drop(columns=['CS (Mpa)'])
Y_cs = cs[['CS (Mpa)']]
X_carbonization = carbonization_rac.drop(columns =['Depth'])
Y_carbonization = carbonization_rac[['Depth']]
X_chloride = chloride_ion_erosion_rac.drop(columns =['Q'])
Y_chloride = chloride_ion_erosion_rac[['Q']]
X_sulfate = sulfate_corrosion_rac.drop(columns =['K'])
Y_sulfate = sulfate_corrosion_rac[['K']]

Data Conversion Into Arrays as Machine Learning Models Take Inputs Only In Form Of Arrays¶

In [ ]:
#Conversion To Arrays For ML Modelling
X_detailed = X_detailed.to_numpy()
Y_detailed = Y_detailed.to_numpy()
X_cs = X_cs.to_numpy()
Y_cs = Y_cs.to_numpy()
X_carbonization = X_carbonization.to_numpy()
Y_carbonization = Y_carbonization.to_numpy()
X_chloride = X_chloride.to_numpy()
Y_chloride = Y_chloride.to_numpy()
X_sulfate = X_sulfate.to_numpy()
Y_sulfate = Y_sulfate.to_numpy()
In [ ]:
#Converting Nested Arrays Into Simple List
Y_carbonization = [element for sublist in Y_carbonization for element in sublist]
Y_chloride = [element for sublist in Y_chloride for element in sublist]
In [ ]:
#Converting List back into an Array for ML Modelling
Y_carbonization = np.array(Y_carbonization)
Y_chloride = np.array(Y_chloride)

Data Standardization For Faster Convergence, Reducing Numerical Instabilities, Regularization & Improved Model Performance¶

In [ ]:
#Data Standardization To Remove Numerical Significance
transform_X_detailed = preprocessing.StandardScaler()
transform_X_cs = preprocessing.StandardScaler()
transform_X_carbonization = preprocessing.StandardScaler()
transform_X_chloride = preprocessing.StandardScaler()
transform_X_sulfate = preprocessing.StandardScaler()
#Separate StandardScaler objects are used so that each dataset is scaled according to its own mean and distribution
X_detailed = transform_X_detailed.fit_transform(X_detailed)
X_cs = transform_X_cs.fit_transform(cs)
X_carbonization = transform_X_carbonization.fit_transform(X_carbonization)
X_chloride = transform_X_chloride.fit_transform(X_chloride)
X_sulfate = transform_X_sulfate.fit_transform(X_sulfate)

Conversion Of Target Variable From 2D Array to 1D Array For Prediction Process¶

In [ ]:
Y_detailed = Y_detailed.ravel()
Y_carbonization.ravel()
Y_chloride.ravel()
Y_sulfate = Y_sulfate.ravel()
Y_cs = Y_cs.ravel()

Data Pipeline Creation¶

In [ ]:
Y_cs.shape
Out[ ]:
(1005,)
In [ ]:
X_cs.shape
Out[ ]:
(1005, 9)
In [ ]:
#Train-Test-Split
X_train_det, X_test_det, Y_train_det, Y_test_det = train_test_split(X_detailed,Y_detailed,test_size=0.1,random_state=22)
X_train_cs, X_test_cs, Y_train_cs, Y_test_cs = train_test_split(X_cs,Y_cs,test_size=0.1,random_state=22)
X_train_c, X_test_c, Y_train_c, Y_test_c = train_test_split(X_carbonization,Y_carbonization,test_size=0.1,random_state=22)
X_train_cl, X_test_cl, Y_train_cl, Y_test_cl = train_test_split(X_chloride,Y_chloride,test_size=0.1,random_state=22)
X_train_sf, X_test_sf, Y_train_sf, Y_test_sf = train_test_split(X_sulfate,Y_sulfate,test_size=0.1,random_state=22)
In [ ]:
X_test_cs.shape
Out[ ]:
(101, 9)

Defining Model Functions, Hypertuning Of Parameters To Prevent Overfitting & Cross-Validation For Realistic Accuracy Scoring. These Functions Will Create a Grid Of All Possible Parameters and Select The Best Possible Parameters For Prediction Purposes.¶

In [ ]:
#Support Vector Machine Function Definition
def svm_training(X_train,Y_train):
    parameters = {'kernel':['linear', 'rbf','poly','rbf', 'sigmoid'],
                  'gamma': [0.1,1,10],
                  'C' :[1,10,100]
                  }
    svm = SVR()
    svm_cv = GridSearchCV(svm,parameters,cv=5)
    svm_cv.fit(X=X_train,y=Y_train)
    print("tuned hyperparameters :(best parameters) ",svm_cv.best_params_)
    print("accuracy :",svm_cv.best_score_)
In [ ]:
#Decision Tree Regressor Function Definition
def decision_regressor(X_train,Y_train):
    parameters = {'max_depth': [3, 5, 7, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 5],
    'max_features': [100, 500, 1000],
    'criterion': ['squared_error']}
    dtr = DecisionTreeRegressor()
    dtr_cv = GridSearchCV(dtr,parameters,cv=3)
    dtr_cv.fit(X=X_train,y=Y_train)
    print("tuned hyperparameters :(best parameters) ",dtr_cv.best_params_)
    print("accuracy :",dtr_cv.best_score_)
In [ ]:
'''
decision_regressor(X_train_det,Y_train_det)
decision_regressor(X_train_cs,Y_train_cs)
decision_regressor(X_train_c,Y_train_c)
decision_regressor(X_train_cl,Y_train_cl)
decision_regressor(X_train_sf,Y_train_sf)
'''
Out[ ]:
'\ndecision_regressor(X_train_det,Y_train_det)\ndecision_regressor(X_train_cs,Y_train_cs)\ndecision_regressor(X_train_c,Y_train_c)\ndecision_regressor(X_train_cl,Y_train_cl)\ndecision_regressor(X_train_sf,Y_train_sf)\n'
In [ ]:
#Random Forest Regressor Function Definition
def rf_regressor(X_train,Y_train):
    parameters = {'n_estimators': [100, 200,240, 300, 500],
    'max_depth': [3, 5, 7,9],
    'min_samples_split': [1,2, 5, 10],
    'min_samples_leaf': [1, 2, 5],
    'max_samples': [0.5,0.7,0.9],
    'bootstrap': [True, False],
    }
    rfr = RandomForestRegressor()
    rfr_cv = GridSearchCV(rfr,parameters,cv=3)
    rfr_cv.fit(X=X_train,y=Y_train)
    print("tuned hyperparameters :(best parameters) ",rfr_cv.best_params_)
    print("accuracy :",rfr_cv.best_score_)
In [ ]:
'''
rf_regressor(X_train_det,Y_train_det)
rf_regressor(X_train_cs,Y_train_cs)
rf_regressor(X_train_c,Y_train_c)
rf_regressor(X_train_cl,Y_train_cl)
rf_regressor(X_train_sf,Y_train_sf)
'''
Out[ ]:
'\nrf_regressor(X_train_det,Y_train_det)\nrf_regressor(X_train_cs,Y_train_cs)\nrf_regressor(X_train_c,Y_train_c)\nrf_regressor(X_train_cl,Y_train_cl)\nrf_regressor(X_train_sf,Y_train_sf)\n'
In [ ]:
#Adaboost Regressor Function Definition
def ada_regressor(X_train,Y_train):
    parameters = {'n_estimators': [50,100,500,1000],
    'learning_rate': [0.01,0.1,1,10],
    'loss': ['linear', 'square', 'exponential']}
    adaR = AdaBoostRegressor()
    adaR_cv = GridSearchCV(adaR,parameters,cv=3)
    adaR_cv.fit(X=X_train,y=Y_train)
    print("tuned hyperparameters :(best parameters) ",adaR_cv.best_params_)
    print("accuracy :",adaR_cv.best_score_)
In [ ]:
'''
ada_regressor(X_train_det,Y_train_det)
ada_regressor(X_train_cs,Y_train_cs)
ada_regressor(X_train_c,Y_train_c)
ada_regressor(X_train_cl,Y_train_cl)
ada_regressor(X_train_sf,Y_train_sf)
'''
Out[ ]:
'\nada_regressor(X_train_det,Y_train_det)\nada_regressor(X_train_cs,Y_train_cs)\nada_regressor(X_train_c,Y_train_c)\nada_regressor(X_train_cl,Y_train_cl)\nada_regressor(X_train_sf,Y_train_sf)\n'
In [ ]:
#Gradient Boosted Decision Trees Function Definition
def GBDT(X_train,Y_train):
    parameters = {'n_estimators': [50,100,150,200,231,500],
    'learning_rate': [0.05,0.075,0.1,0.15,0.2,1,10],
    'loss': ['squared_error'],
    'criterion':['friedman_mse'],
    'max_depth':[1,3,4,5],
    'subsample':[0.8,0.9],
    'min_samples_split' :[],
    }
    gbtree = GradientBoostingRegressor()
    gbtree_cv = GridSearchCV(gbtree,parameters,cv=3)
    gbtree_cv.fit(X=X_train,y=Y_train)
    print("tuned hyperparameters :(best parameters) ",gbtree_cv.best_params_)
    print("accuracy :",gbtree_cv.best_score_)
In [ ]:
'''
GBDT(X_train_det,Y_train_det)
GBDT(X_train_cs,Y_train_cs)
GBDT(X_train_c,Y_train_c)
GBDT(X_train_cl,Y_train_cl)
GBDT(X_train_sf,Y_train_sf)
'''
Out[ ]:
'\nGBDT(X_train_det,Y_train_det)\nGBDT(X_train_cs,Y_train_cs)\nGBDT(X_train_c,Y_train_c)\nGBDT(X_train_cl,Y_train_cl)\nGBDT(X_train_sf,Y_train_sf)\n'
In [ ]:
#Historical Gradient Boosting Regressor Function Definition
def histR(X_train,Y_train):
    parameters = {
    'max_iter': [50, 100, 200,500],
    'learning_rate': [0.01, 0.1,0.2, 1.0],
    'loss': ['absolute_error', 'squared_error'],
    'min_samples_leaf':[5,10,15,20],
    'max_depth':[1,3,4,5]}
    histR = HistGradientBoostingRegressor()
    histR_cv = GridSearchCV(histR,parameters,cv=3)
    histR_cv.fit(X=X_train,y=Y_train)
    print("tuned hyperparameters :(best parameters) ",histR_cv.best_params_)
    print("accuracy :",histR_cv.best_score_)
In [ ]:
'''
histR(X_train_det,Y_train_det)
histR(X_train_cs,Y_train_cs)
histR(X_train_c,Y_train_c)
histR(X_train_cl,Y_train_cl)
histR(X_train_sf,Y_train_sf)
'''
Out[ ]:
'\nhistR(X_train_det,Y_train_det)\nhistR(X_train_cs,Y_train_cs)\nhistR(X_train_c,Y_train_c)\nhistR(X_train_cl,Y_train_cl)\nhistR(X_train_sf,Y_train_sf)\n'

Bayes Optimization For Selected XGBoost Model For Predictions¶

Bayes Optimization For RAC Strength Prediction¶

In [ ]:
'''
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval
from sklearn import metrics

# Create hyperparameter space to search over
space = {'max_depth': hp.choice('max_depth', np.arange(3, 15, 1, dtype = int)),
        'n_estimators': hp.choice('n_estimators', np.arange(50, 300, 10, dtype = int)),
        'colsample_bytree': hp.quniform('colsample_bytree', 0.5, 1.0, 0.1),
        'min_child_weight': hp.choice('min_child_weight', np.arange(0, 10, 1, dtype = int)),
        'subsample': hp.quniform('subsample', 0.5, 1.0, 0.1),
        'learning_rate': hp.quniform('learning_rate', 0.1, 0.3, 0.1),
         'gamma': hp.choice('gamma', np.arange(0, 20, 0.5, dtype = float)),
         'reg_alpha': hp.choice('reg_alpha', np.arange(0, 20, 0.5, dtype = float)),
         'reg_lambda': hp.choice('reg_lambda', np.arange(0, 20, 0.5, dtype = float)),
         
        'objective': 'reg:squarederror',
        
        'eval_metric': 'rmse'}

def score(params):
    model = XGBRegressor(**params)
    
    model.fit(X_train_det, Y_train_det, 
              eval_set = [(X_train_det, Y_train_det), (X_test_det, Y_test_det)],
              verbose = False, 
              early_stopping_rounds = 10)
    
    y_pred = model.predict(X_test_det)
    score = np.sqrt(metrics.mean_squared_error(Y_test_det, y_pred))
    print(score)
    return {'loss': score, 'status': STATUS_OK}    
    
def optimize(trials, space):
    
    best = fmin(score, space, algo = tpe.suggest, max_evals = 1000)
    return best

trials = Trials()
best_params = optimize(trials, space)
'''
Out[ ]:
"\nfrom hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval\nfrom sklearn import metrics\n\n# Create hyperparameter space to search over\nspace = {'max_depth': hp.choice('max_depth', np.arange(3, 15, 1, dtype = int)),\n        'n_estimators': hp.choice('n_estimators', np.arange(50, 300, 10, dtype = int)),\n        'colsample_bytree': hp.quniform('colsample_bytree', 0.5, 1.0, 0.1),\n        'min_child_weight': hp.choice('min_child_weight', np.arange(0, 10, 1, dtype = int)),\n        'subsample': hp.quniform('subsample', 0.5, 1.0, 0.1),\n        'learning_rate': hp.quniform('learning_rate', 0.1, 0.3, 0.1),\n         'gamma': hp.choice('gamma', np.arange(0, 20, 0.5, dtype = float)),\n         'reg_alpha': hp.choice('reg_alpha', np.arange(0, 20, 0.5, dtype = float)),\n         'reg_lambda': hp.choice('reg_lambda', np.arange(0, 20, 0.5, dtype = float)),\n         \n        'objective': 'reg:squarederror',\n        \n        'eval_metric': 'rmse'}\n\ndef score(params):\n    model = XGBRegressor(**params)\n    \n    model.fit(X_train_det, Y_train_det, \n              eval_set = [(X_train_det, Y_train_det), (X_test_det, Y_test_det)],\n              verbose = False, \n              early_stopping_rounds = 10)\n    \n    y_pred = model.predict(X_test_det)\n    score = np.sqrt(metrics.mean_squared_error(Y_test_det, y_pred))\n    print(score)\n    return {'loss': score, 'status': STATUS_OK}    \n    \ndef optimize(trials, space):\n    \n    best = fmin(score, space, algo = tpe.suggest, max_evals = 1000)\n    return best\n\ntrials = Trials()\nbest_params = optimize(trials, space)\n"
In [ ]:
#space_eval(space, best_params)
In [ ]:
model_opt_det = XGBRegressor(max_depth = 10, 
                         n_estimators = 260, 
                         learning_rate = 0.30000000000000004, 
                         min_child_weight = 1, 
                         subsample = 0.9,
                         colsample_bytree = 0.7000000000000001, 
                         gamma = 0.5, 
                         reg_alpha = 4.0, 
                         reg_lambda = 8.5, 
                         objective = 'reg:squarederror')

# Fit with hp datasets
model_opt_det.fit(X_train_det, Y_train_det, 
              eval_set = [(X_train_det, Y_train_det), (X_test_det, Y_test_det)], 
              eval_metric = 'rmse', 
              verbose = True, 
              early_stopping_rounds = 10)
[0]	validation_0-rmse:32.14178	validation_1-rmse:33.68727
[1]	validation_0-rmse:24.01826	validation_1-rmse:25.43059
[2]	validation_0-rmse:18.36169	validation_1-rmse:19.33178
[3]	validation_0-rmse:14.52273	validation_1-rmse:15.32806
[4]	validation_0-rmse:11.94513	validation_1-rmse:12.50188
[5]	validation_0-rmse:9.94591	validation_1-rmse:10.46617
[6]	validation_0-rmse:8.63880	validation_1-rmse:9.26681
[7]	validation_0-rmse:7.61058	validation_1-rmse:8.29531
[8]	validation_0-rmse:6.71324	validation_1-rmse:7.27969
[9]	validation_0-rmse:5.99349	validation_1-rmse:6.48184
[10]	validation_0-rmse:5.48237	validation_1-rmse:5.95102
[11]	validation_0-rmse:5.03353	validation_1-rmse:5.49901
[12]	validation_0-rmse:4.54168	validation_1-rmse:5.17769
[13]	validation_0-rmse:4.13829	validation_1-rmse:4.63691
[14]	validation_0-rmse:3.83670	validation_1-rmse:4.39988
[15]	validation_0-rmse:3.55178	validation_1-rmse:4.14578
[16]	validation_0-rmse:3.31508	validation_1-rmse:3.89523
[17]	validation_0-rmse:3.11495	validation_1-rmse:3.77367
[18]	validation_0-rmse:2.92512	validation_1-rmse:3.63301
[19]	validation_0-rmse:2.76099	validation_1-rmse:3.50699
[20]	validation_0-rmse:2.62965	validation_1-rmse:3.35057
[21]	validation_0-rmse:2.51734	validation_1-rmse:3.31364
[22]	validation_0-rmse:2.37469	validation_1-rmse:3.29932
[23]	validation_0-rmse:2.27571	validation_1-rmse:3.20065
[24]	validation_0-rmse:2.19513	validation_1-rmse:3.19163
[25]	validation_0-rmse:2.13777	validation_1-rmse:3.16702
[26]	validation_0-rmse:2.07465	validation_1-rmse:3.10568
[27]	validation_0-rmse:2.02414	validation_1-rmse:3.11172
[28]	validation_0-rmse:1.94760	validation_1-rmse:3.06507
[29]	validation_0-rmse:1.89781	validation_1-rmse:3.01364
[30]	validation_0-rmse:1.85678	validation_1-rmse:3.00868
[31]	validation_0-rmse:1.80844	validation_1-rmse:2.95649
[32]	validation_0-rmse:1.77042	validation_1-rmse:2.90616
[33]	validation_0-rmse:1.72461	validation_1-rmse:2.92644
[34]	validation_0-rmse:1.69729	validation_1-rmse:2.91048
[35]	validation_0-rmse:1.67569	validation_1-rmse:2.90133
[36]	validation_0-rmse:1.63856	validation_1-rmse:2.89890
[37]	validation_0-rmse:1.60463	validation_1-rmse:2.86288
[38]	validation_0-rmse:1.57233	validation_1-rmse:2.83164
[39]	validation_0-rmse:1.54644	validation_1-rmse:2.80330
[40]	validation_0-rmse:1.52616	validation_1-rmse:2.78058
[41]	validation_0-rmse:1.51215	validation_1-rmse:2.75858
[42]	validation_0-rmse:1.48841	validation_1-rmse:2.73100
[43]	validation_0-rmse:1.47244	validation_1-rmse:2.71497
[44]	validation_0-rmse:1.45498	validation_1-rmse:2.73169
[45]	validation_0-rmse:1.43814	validation_1-rmse:2.72090
[46]	validation_0-rmse:1.42980	validation_1-rmse:2.70995
[47]	validation_0-rmse:1.41194	validation_1-rmse:2.69451
[48]	validation_0-rmse:1.39942	validation_1-rmse:2.67249
[49]	validation_0-rmse:1.38923	validation_1-rmse:2.66047
[50]	validation_0-rmse:1.37862	validation_1-rmse:2.65821
[51]	validation_0-rmse:1.36559	validation_1-rmse:2.65054
[52]	validation_0-rmse:1.35581	validation_1-rmse:2.64945
[53]	validation_0-rmse:1.34595	validation_1-rmse:2.62984
[54]	validation_0-rmse:1.33044	validation_1-rmse:2.61977
[55]	validation_0-rmse:1.31878	validation_1-rmse:2.61609
[56]	validation_0-rmse:1.31316	validation_1-rmse:2.61511
[57]	validation_0-rmse:1.30255	validation_1-rmse:2.61339
[58]	validation_0-rmse:1.29778	validation_1-rmse:2.61461
[59]	validation_0-rmse:1.29519	validation_1-rmse:2.60850
[60]	validation_0-rmse:1.29153	validation_1-rmse:2.58943
[61]	validation_0-rmse:1.28892	validation_1-rmse:2.59263
[62]	validation_0-rmse:1.28197	validation_1-rmse:2.57570
[63]	validation_0-rmse:1.27363	validation_1-rmse:2.57240
[64]	validation_0-rmse:1.26603	validation_1-rmse:2.57216
[65]	validation_0-rmse:1.26028	validation_1-rmse:2.57722
[66]	validation_0-rmse:1.25382	validation_1-rmse:2.57452
[67]	validation_0-rmse:1.24639	validation_1-rmse:2.55913
[68]	validation_0-rmse:1.23809	validation_1-rmse:2.54742
[69]	validation_0-rmse:1.23646	validation_1-rmse:2.54384
[70]	validation_0-rmse:1.23289	validation_1-rmse:2.52633
[71]	validation_0-rmse:1.22706	validation_1-rmse:2.53446
[72]	validation_0-rmse:1.22391	validation_1-rmse:2.54140
[73]	validation_0-rmse:1.22172	validation_1-rmse:2.53964
[74]	validation_0-rmse:1.21911	validation_1-rmse:2.54354
[75]	validation_0-rmse:1.21548	validation_1-rmse:2.54058
[76]	validation_0-rmse:1.20630	validation_1-rmse:2.52382
[77]	validation_0-rmse:1.20242	validation_1-rmse:2.52131
[78]	validation_0-rmse:1.20029	validation_1-rmse:2.51830
[79]	validation_0-rmse:1.19966	validation_1-rmse:2.51019
[80]	validation_0-rmse:1.19697	validation_1-rmse:2.51877
[81]	validation_0-rmse:1.19487	validation_1-rmse:2.51695
[82]	validation_0-rmse:1.19437	validation_1-rmse:2.51468
[83]	validation_0-rmse:1.19304	validation_1-rmse:2.50115
[84]	validation_0-rmse:1.18672	validation_1-rmse:2.47472
[85]	validation_0-rmse:1.18517	validation_1-rmse:2.47523
[86]	validation_0-rmse:1.18069	validation_1-rmse:2.49064
[87]	validation_0-rmse:1.17772	validation_1-rmse:2.49056
[88]	validation_0-rmse:1.17443	validation_1-rmse:2.48882
[89]	validation_0-rmse:1.17319	validation_1-rmse:2.48529
[90]	validation_0-rmse:1.16866	validation_1-rmse:2.48427
[91]	validation_0-rmse:1.16526	validation_1-rmse:2.48056
[92]	validation_0-rmse:1.16526	validation_1-rmse:2.48056
[93]	validation_0-rmse:1.16446	validation_1-rmse:2.48226
Out[ ]:
XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.7000000000000001, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0.5, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.30000000000000004,
             max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=10, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=260, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.7000000000000001, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0.5, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.30000000000000004,
             max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=10, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=260, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=None, ...)
In [ ]:
y_pred_1_opt = model_opt_det.predict(X_test_det)
In [ ]:
plt.style.use('dark_background')
y_line = np.arange(int(Y_test_det.min()) - 10, int(Y_test_det.max()) + 10)
plt.figure(figsize = (14, 10))
ax = plt.axes()
plt.plot(y_line, y_line, 'k--', lw = 1, label = 'Perfect Fit',color='w')
sns.scatterplot(x=y_pred_1_opt, y=Y_test_det, s = 100,hue=Y_test_det,palette='magma',markers=True)
plt.xlabel('Predicted Strength Values (MPa)', fontsize = 20, labelpad = 15)
plt.ylabel('True Strength Values (MPa)', fontsize = 20, labelpad = 15)
plt.title('XGB Optimized RAC Strength Predictions', fontsize = 22, c = 'w', pad = 20)
plt.legend(fontsize = 15)
plt.tick_params(labelsize = 15)
plt.show()
No description has been provided for this image
In [ ]:
residuals_1_opt = Y_test_det - y_pred_1_opt
In [ ]:
plt.figure(figsize = (12, 10))
ax = plt.axes()
sns.histplot(residuals_1_opt, bins = 5, alpha = 0.5,kde=True,binwidth=1,color = 'r')
plt.xlabel('Residual Values', fontsize = 20, labelpad = 15)
plt.ylabel('Count', fontsize = 20, labelpad = 15)
plt.title('Histogram of Error Values (RMSE)', fontsize = 22, c = 'w', pad = 20)
plt.tick_params(labelsize = 15)
plt.legend(fontsize = 12)
plt.style.use('bmh')
plt.show()
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No description has been provided for this image
In [ ]:
from sklearn.metrics import r2_score
yhat=model_opt_det.predict(X_train_det)
r_squared = r2_score(Y_train_det,yhat)
print(r_squared)
yhat=model_opt_det.predict(X_test_det)
r_squared = r2_score(Y_test_det,yhat)
print(r_squared)
0.9904442412981427
0.9586735753743867

Bayes Optimization For NAC Compressive Strength Prediction¶

In [ ]:
'''
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval
from sklearn import metrics

# Create hyperparameter space to search over
space = {'max_depth': hp.choice('max_depth', np.arange(3, 15, 1, dtype = int)),
        'n_estimators': hp.choice('n_estimators', np.arange(50, 300, 10, dtype = int)),
        'colsample_bytree': hp.quniform('colsample_bytree', 0.5, 1.0, 0.1),
        'min_child_weight': hp.choice('min_child_weight', np.arange(0, 10, 1, dtype = int)),
        'subsample': hp.quniform('subsample', 0.5, 1.0, 0.1),
        'learning_rate': hp.quniform('learning_rate', 0.1, 0.3, 0.1),
         'gamma': hp.choice('gamma', np.arange(0, 20, 0.5, dtype = float)),
         'reg_alpha': hp.choice('reg_alpha', np.arange(0, 20, 0.5, dtype = float)),
         'reg_lambda': hp.choice('reg_lambda', np.arange(0, 20, 0.5, dtype = float)),
         
        'objective': 'reg:squarederror',
        
        'eval_metric': 'rmse'}

def score(params):
    model = XGBRegressor(**params)
    
    model.fit(X_train_cs, Y_train_cs, 
              eval_set = [(X_train_cs, Y_train_cs), (X_test_cs, Y_test_cs)],
              verbose = False, 
              early_stopping_rounds = 10)
    
    y_pred = model.predict(X_test_cs)
    score = np.sqrt(metrics.mean_squared_error(Y_test_cs, y_pred))
    print(score)
    return {'loss': score, 'status': STATUS_OK}    
    
def optimize(trials, space):
    
    best = fmin(score, space, algo = tpe.suggest, max_evals = 1000)
    return best

trials = Trials()
best_params = optimize(trials, space)
'''
Out[ ]:
"\nfrom hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval\nfrom sklearn import metrics\n\n# Create hyperparameter space to search over\nspace = {'max_depth': hp.choice('max_depth', np.arange(3, 15, 1, dtype = int)),\n        'n_estimators': hp.choice('n_estimators', np.arange(50, 300, 10, dtype = int)),\n        'colsample_bytree': hp.quniform('colsample_bytree', 0.5, 1.0, 0.1),\n        'min_child_weight': hp.choice('min_child_weight', np.arange(0, 10, 1, dtype = int)),\n        'subsample': hp.quniform('subsample', 0.5, 1.0, 0.1),\n        'learning_rate': hp.quniform('learning_rate', 0.1, 0.3, 0.1),\n         'gamma': hp.choice('gamma', np.arange(0, 20, 0.5, dtype = float)),\n         'reg_alpha': hp.choice('reg_alpha', np.arange(0, 20, 0.5, dtype = float)),\n         'reg_lambda': hp.choice('reg_lambda', np.arange(0, 20, 0.5, dtype = float)),\n         \n        'objective': 'reg:squarederror',\n        \n        'eval_metric': 'rmse'}\n\ndef score(params):\n    model = XGBRegressor(**params)\n    \n    model.fit(X_train_cs, Y_train_cs, \n              eval_set = [(X_train_cs, Y_train_cs), (X_test_cs, Y_test_cs)],\n              verbose = False, \n              early_stopping_rounds = 10)\n    \n    y_pred = model.predict(X_test_cs)\n    score = np.sqrt(metrics.mean_squared_error(Y_test_cs, y_pred))\n    print(score)\n    return {'loss': score, 'status': STATUS_OK}    \n    \ndef optimize(trials, space):\n    \n    best = fmin(score, space, algo = tpe.suggest, max_evals = 1000)\n    return best\n\ntrials = Trials()\nbest_params = optimize(trials, space)\n"
In [ ]:
#space_eval(space, best_params)
In [ ]:
model_opt_cs = XGBRegressor(max_depth = 11, 
                         n_estimators = 250, 
                         learning_rate = 0.1, 
                         min_child_weight = 0, 
                         subsample = 0.7000000000000001,
                         colsample_bytree = 1.0, 
                         gamma = 2.0, 
                         reg_alpha = 0.5, 
                         reg_lambda = 0.0, 
                         objective = 'reg:squarederror')

# Fit with hp datasets
model_opt_cs.fit(X_train_cs, Y_train_cs, 
              eval_set = [(X_train_cs, Y_train_cs), (X_test_cs, Y_test_cs)], 
              eval_metric = 'rmse', 
              verbose = True, 
              early_stopping_rounds = 10)
[0]	validation_0-rmse:34.27975	validation_1-rmse:36.76922
[1]	validation_0-rmse:30.85638	validation_1-rmse:33.09413
[2]	validation_0-rmse:27.77587	validation_1-rmse:29.79214
[3]	validation_0-rmse:25.00424	validation_1-rmse:26.81460
[4]	validation_0-rmse:22.50660	validation_1-rmse:24.14586
[5]	validation_0-rmse:20.26236	validation_1-rmse:21.75263
[6]	validation_0-rmse:18.23984	validation_1-rmse:19.60021
[7]	validation_0-rmse:16.42020	validation_1-rmse:17.64723
[8]	validation_0-rmse:14.78355	validation_1-rmse:15.88347
[9]	validation_0-rmse:13.31109	validation_1-rmse:14.32231
[10]	validation_0-rmse:11.98361	validation_1-rmse:12.90345
[11]	validation_0-rmse:10.78992	validation_1-rmse:11.62606
[12]	validation_0-rmse:9.71187	validation_1-rmse:10.47937
[13]	validation_0-rmse:8.74352	validation_1-rmse:9.44628
[14]	validation_0-rmse:7.87266	validation_1-rmse:8.51038
[15]	validation_0-rmse:7.08839	validation_1-rmse:7.67175
[16]	validation_0-rmse:6.38441	validation_1-rmse:6.92014
[17]	validation_0-rmse:5.74789	validation_1-rmse:6.23870
[18]	validation_0-rmse:5.17536	validation_1-rmse:5.62398
[19]	validation_0-rmse:4.65856	validation_1-rmse:5.07271
[20]	validation_0-rmse:4.19439	validation_1-rmse:4.57531
[21]	validation_0-rmse:3.77793	validation_1-rmse:4.13175
[22]	validation_0-rmse:3.40219	validation_1-rmse:3.72524
[23]	validation_0-rmse:3.06706	validation_1-rmse:3.36975
[24]	validation_0-rmse:2.76239	validation_1-rmse:3.04105
[25]	validation_0-rmse:2.48811	validation_1-rmse:2.74361
[26]	validation_0-rmse:2.24094	validation_1-rmse:2.48221
[27]	validation_0-rmse:2.01811	validation_1-rmse:2.24227
[28]	validation_0-rmse:1.81901	validation_1-rmse:2.03429
[29]	validation_0-rmse:1.63937	validation_1-rmse:1.84261
[30]	validation_0-rmse:1.47707	validation_1-rmse:1.66691
[31]	validation_0-rmse:1.33126	validation_1-rmse:1.51236
[32]	validation_0-rmse:1.19993	validation_1-rmse:1.37443
[33]	validation_0-rmse:1.08167	validation_1-rmse:1.25491
[34]	validation_0-rmse:0.97543	validation_1-rmse:1.14255
[35]	validation_0-rmse:0.88060	validation_1-rmse:1.04450
[36]	validation_0-rmse:0.79536	validation_1-rmse:0.95707
[37]	validation_0-rmse:0.71912	validation_1-rmse:0.87987
[38]	validation_0-rmse:0.65072	validation_1-rmse:0.81060
[39]	validation_0-rmse:0.58948	validation_1-rmse:0.74713
[40]	validation_0-rmse:0.53318	validation_1-rmse:0.69051
[41]	validation_0-rmse:0.48381	validation_1-rmse:0.64363
[42]	validation_0-rmse:0.43983	validation_1-rmse:0.60253
[43]	validation_0-rmse:0.39965	validation_1-rmse:0.55590
[44]	validation_0-rmse:0.36340	validation_1-rmse:0.51847
[45]	validation_0-rmse:0.33200	validation_1-rmse:0.48788
[46]	validation_0-rmse:0.30359	validation_1-rmse:0.46392
[47]	validation_0-rmse:0.27833	validation_1-rmse:0.44021
[48]	validation_0-rmse:0.25621	validation_1-rmse:0.42217
[49]	validation_0-rmse:0.23533	validation_1-rmse:0.40558
[50]	validation_0-rmse:0.21926	validation_1-rmse:0.39351
[51]	validation_0-rmse:0.20429	validation_1-rmse:0.38184
[52]	validation_0-rmse:0.19139	validation_1-rmse:0.37175
[53]	validation_0-rmse:0.17858	validation_1-rmse:0.35173
[54]	validation_0-rmse:0.16900	validation_1-rmse:0.34540
[55]	validation_0-rmse:0.15991	validation_1-rmse:0.33664
[56]	validation_0-rmse:0.15421	validation_1-rmse:0.33286
[57]	validation_0-rmse:0.14772	validation_1-rmse:0.32879
[58]	validation_0-rmse:0.14127	validation_1-rmse:0.32134
[59]	validation_0-rmse:0.13550	validation_1-rmse:0.31705
[60]	validation_0-rmse:0.13268	validation_1-rmse:0.31513
[61]	validation_0-rmse:0.12826	validation_1-rmse:0.30842
[62]	validation_0-rmse:0.12621	validation_1-rmse:0.30697
[63]	validation_0-rmse:0.12451	validation_1-rmse:0.30571
[64]	validation_0-rmse:0.12318	validation_1-rmse:0.30469
[65]	validation_0-rmse:0.12200	validation_1-rmse:0.30373
[66]	validation_0-rmse:0.12123	validation_1-rmse:0.30309
[67]	validation_0-rmse:0.12051	validation_1-rmse:0.30245
[68]	validation_0-rmse:0.11988	validation_1-rmse:0.30187
[69]	validation_0-rmse:0.11939	validation_1-rmse:0.30138
[70]	validation_0-rmse:0.11906	validation_1-rmse:0.30103
[71]	validation_0-rmse:0.11868	validation_1-rmse:0.30061
[72]	validation_0-rmse:0.11843	validation_1-rmse:0.30031
[73]	validation_0-rmse:0.11590	validation_1-rmse:0.29762
[74]	validation_0-rmse:0.11570	validation_1-rmse:0.29735
[75]	validation_0-rmse:0.11555	validation_1-rmse:0.29713
[76]	validation_0-rmse:0.11542	validation_1-rmse:0.29694
[77]	validation_0-rmse:0.11532	validation_1-rmse:0.29677
[78]	validation_0-rmse:0.11523	validation_1-rmse:0.29659
[79]	validation_0-rmse:0.11517	validation_1-rmse:0.29648
[80]	validation_0-rmse:0.11511	validation_1-rmse:0.29634
[81]	validation_0-rmse:0.11505	validation_1-rmse:0.29620
[82]	validation_0-rmse:0.11503	validation_1-rmse:0.29615
[83]	validation_0-rmse:0.11500	validation_1-rmse:0.29606
[84]	validation_0-rmse:0.11498	validation_1-rmse:0.29600
[85]	validation_0-rmse:0.11495	validation_1-rmse:0.29590
[86]	validation_0-rmse:0.11494	validation_1-rmse:0.29585
[87]	validation_0-rmse:0.11493	validation_1-rmse:0.29581
[88]	validation_0-rmse:0.11492	validation_1-rmse:0.29576
[89]	validation_0-rmse:0.11492	validation_1-rmse:0.29577
[90]	validation_0-rmse:0.11491	validation_1-rmse:0.29570
[91]	validation_0-rmse:0.11491	validation_1-rmse:0.29568
[92]	validation_0-rmse:0.11490	validation_1-rmse:0.29565
[93]	validation_0-rmse:0.11490	validation_1-rmse:0.29559
[94]	validation_0-rmse:0.11489	validation_1-rmse:0.29557
[95]	validation_0-rmse:0.11489	validation_1-rmse:0.29555
[96]	validation_0-rmse:0.11489	validation_1-rmse:0.29555
[97]	validation_0-rmse:0.11489	validation_1-rmse:0.29554
[98]	validation_0-rmse:0.11490	validation_1-rmse:0.29559
[99]	validation_0-rmse:0.11490	validation_1-rmse:0.29559
[100]	validation_0-rmse:0.11489	validation_1-rmse:0.29558
[101]	validation_0-rmse:0.11489	validation_1-rmse:0.29555
[102]	validation_0-rmse:0.11489	validation_1-rmse:0.29555
[103]	validation_0-rmse:0.11489	validation_1-rmse:0.29553
[104]	validation_0-rmse:0.11489	validation_1-rmse:0.29551
[105]	validation_0-rmse:0.11489	validation_1-rmse:0.29552
[106]	validation_0-rmse:0.11489	validation_1-rmse:0.29552
[107]	validation_0-rmse:0.11489	validation_1-rmse:0.29552
[108]	validation_0-rmse:0.11489	validation_1-rmse:0.29549
[109]	validation_0-rmse:0.11489	validation_1-rmse:0.29547
[110]	validation_0-rmse:0.11489	validation_1-rmse:0.29547
[111]	validation_0-rmse:0.11488	validation_1-rmse:0.29543
[112]	validation_0-rmse:0.11488	validation_1-rmse:0.29543
[113]	validation_0-rmse:0.11488	validation_1-rmse:0.29543
[114]	validation_0-rmse:0.11488	validation_1-rmse:0.29544
[115]	validation_0-rmse:0.11488	validation_1-rmse:0.29543
[116]	validation_0-rmse:0.11488	validation_1-rmse:0.29543
[117]	validation_0-rmse:0.11488	validation_1-rmse:0.29544
[118]	validation_0-rmse:0.11488	validation_1-rmse:0.29547
[119]	validation_0-rmse:0.11488	validation_1-rmse:0.29545
[120]	validation_0-rmse:0.11488	validation_1-rmse:0.29545
[121]	validation_0-rmse:0.11488	validation_1-rmse:0.29544
[122]	validation_0-rmse:0.11488	validation_1-rmse:0.29544
[123]	validation_0-rmse:0.11488	validation_1-rmse:0.29544
[124]	validation_0-rmse:0.11488	validation_1-rmse:0.29544
[125]	validation_0-rmse:0.11488	validation_1-rmse:0.29544
Out[ ]:
XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=1.0, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=2.0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=11, max_leaves=None,
             min_child_weight=0, missing=nan, monotone_constraints=None,
             n_estimators=250, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=1.0, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=2.0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=11, max_leaves=None,
             min_child_weight=0, missing=nan, monotone_constraints=None,
             n_estimators=250, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=None, ...)
In [ ]:
y_pred_1_opt = model_opt_cs.predict(X_test_cs)
In [ ]:
y_line = np.arange(int(Y_test_cs.min()) - 10, int(Y_test_cs.max()) + 10)
plt.figure(figsize = (14, 10))
ax = plt.axes()
plt.style.use('dark_background')
plt.plot(y_line, y_line, 'k--', lw = 1, label = 'Perfect Fit',color='w')
sns.scatterplot(x=y_pred_1_opt, y=Y_test_cs, s = 100,hue=Y_test_cs,palette='magma',markers=True)
plt.xlabel('Predicted Strength Values (MPa)', fontsize = 20, labelpad = 15)
plt.ylabel('True Strength Values (MPa)', fontsize = 20, labelpad = 15)
plt.title('XGB Optimized NAC Strength Predictions', fontsize = 22, c = 'w', pad = 20)
plt.legend(fontsize = 15)
plt.tick_params(labelsize = 15)
plt.show()
No description has been provided for this image
In [ ]:
residuals_1_opt = Y_test_cs - y_pred_1_opt
residuals_1_opt
Out[ ]:
array([-2.20352173e-02, -3.10827332e-01,  1.35392761e-02,  2.36003113e-02,
       -5.03616333e-03,  1.55748367e-01,  8.57034302e-02,  5.08659363e-02,
        6.32261658e-02, -1.19328079e-01, -4.88452148e-02, -2.65269470e-02,
        1.99243927e-01,  2.61042938e+00, -1.00540314e-01,  4.57672119e-03,
        2.69793854e-01, -1.89980316e-02,  1.79663086e-01, -7.42427444e-02,
       -1.90060349e-01, -9.69296265e-02,  7.26425171e-02,  6.92439270e-02,
        1.51379395e-02, -4.58314514e-02,  6.92439270e-02,  8.67538452e-03,
        2.68666077e-02,  4.24588013e-02,  5.07374573e-02,  8.63055420e-02,
        6.50463867e-02, -4.71398163e-02, -4.64963150e-02,  2.05990601e-02,
        5.05493164e-02, -6.99682617e-02, -1.25219498e-01,  1.17144394e-01,
       -1.05784149e-01,  6.09234924e-01,  6.17457581e-01, -2.82121277e-02,
       -6.07819366e-02,  1.25201721e-01,  3.21763611e-02,  4.33070374e-01,
        1.25748367e-01, -1.67948990e-01, -1.80922852e-01, -8.43231964e-02,
       -1.21395416e-01, -1.50042877e-01, -1.40129089e-03, -1.21582108e-01,
        1.40011139e-01,  9.59335327e-03,  2.75543976e-02, -9.00428772e-02,
       -1.39778557e-01, -4.49899292e-02,  7.10691833e-02,  6.58149719e-02,
        2.17585754e-02,  1.36297607e-02,  1.05990601e-02, -2.20540314e-01,
       -2.30060349e-01, -7.69296265e-02, -6.52541351e-02,  2.74568176e-01,
        1.01600952e-01, -2.17330933e-03,  3.05493164e-02,  9.85987091e-02,
        1.05812225e-01,  3.05493164e-02, -2.42539673e-01, -6.41282654e-02,
       -9.25044250e-02,  1.26229858e-02, -7.99682617e-02,  4.17585754e-02,
       -1.98558807e-02,  1.72001801e-01, -1.63702393e-02, -9.30449677e-02,
       -1.52207947e-02,  4.21585083e-03,  3.14096069e-02,  1.25901794e-02,
        7.19241333e-02,  9.95712280e-03,  1.78787231e-03, -2.03003693e-01,
        7.11509705e-02,  6.94046021e-03, -6.94847488e-02, -7.06665039e-03,
       -3.61312866e-03])
In [ ]:
plt.figure(figsize = (12, 10))
ax = plt.axes()
sns.histplot(residuals_1_opt, bins = 15, alpha = 0.5,kde=True,color = 'r')
plt.xlabel('Residual Values (MPa)', fontsize = 20, labelpad = 15)
plt.ylabel('Count', fontsize = 20, labelpad = 15)
plt.title('Histogram of Error Values (RMSE)', fontsize = 22, c = 'w', pad = 20)
plt.tick_params(labelsize = 15)
plt.legend(fontsize = 12)
plt.style.use('bmh')
plt.show()
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No description has been provided for this image
In [ ]:
from sklearn.metrics import r2_score
yhat=model_opt_cs.predict(X_train_cs)
r_squared = r2_score(Y_train_cs,yhat)
print(r_squared)
yhat=model_opt_cs.predict(X_test_cs)
r_squared = r2_score(Y_test_cs,yhat)
print(r_squared)
0.9999493522877259
0.9997071870881837

Bayes Optimization For Carbonization Depth¶

In [ ]:
'''
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval
from sklearn import metrics

# Create hyperparameter space to search over
space = {'max_depth': hp.choice('max_depth', np.arange(3, 15, 1, dtype = int)),
        'n_estimators': hp.choice('n_estimators', np.arange(50, 300, 10, dtype = int)),
        'colsample_bytree': hp.quniform('colsample_bytree', 0.5, 1.0, 0.1),
        'min_child_weight': hp.choice('min_child_weight', np.arange(0, 10, 1, dtype = int)),
        'subsample': hp.quniform('subsample', 0.5, 1.0, 0.1),
        'learning_rate': hp.quniform('learning_rate', 0.1, 0.3, 0.1),
         'gamma': hp.choice('gamma', np.arange(0, 20, 0.5, dtype = float)),
         'reg_alpha': hp.choice('reg_alpha', np.arange(0, 20, 0.5, dtype = float)),
         'reg_lambda': hp.choice('reg_lambda', np.arange(0, 20, 0.5, dtype = float)),
         
        'objective': 'reg:squarederror',
        
        'eval_metric': 'rmse'}

def score(params):
    model = XGBRegressor(**params)
    
    model.fit(X_train_c, Y_train_c, 
              eval_set = [(X_train_c, Y_train_c), (X_test_c, Y_test_c)],
              verbose = False, 
              early_stopping_rounds = 10)
    
    y_pred = model.predict(X_test_c)
    score = np.sqrt(metrics.mean_squared_error(Y_test_c, y_pred))
    print(score)
    return {'loss': score, 'status': STATUS_OK}    
    
def optimize(trials, space):
    
    best = fmin(score, space, algo = tpe.suggest, max_evals = 1000)
    return best

trials = Trials()
best_params = optimize(trials, space)
'''
Out[ ]:
"\nfrom hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval\nfrom sklearn import metrics\n\n# Create hyperparameter space to search over\nspace = {'max_depth': hp.choice('max_depth', np.arange(3, 15, 1, dtype = int)),\n        'n_estimators': hp.choice('n_estimators', np.arange(50, 300, 10, dtype = int)),\n        'colsample_bytree': hp.quniform('colsample_bytree', 0.5, 1.0, 0.1),\n        'min_child_weight': hp.choice('min_child_weight', np.arange(0, 10, 1, dtype = int)),\n        'subsample': hp.quniform('subsample', 0.5, 1.0, 0.1),\n        'learning_rate': hp.quniform('learning_rate', 0.1, 0.3, 0.1),\n         'gamma': hp.choice('gamma', np.arange(0, 20, 0.5, dtype = float)),\n         'reg_alpha': hp.choice('reg_alpha', np.arange(0, 20, 0.5, dtype = float)),\n         'reg_lambda': hp.choice('reg_lambda', np.arange(0, 20, 0.5, dtype = float)),\n         \n        'objective': 'reg:squarederror',\n        \n        'eval_metric': 'rmse'}\n\ndef score(params):\n    model = XGBRegressor(**params)\n    \n    model.fit(X_train_c, Y_train_c, \n              eval_set = [(X_train_c, Y_train_c), (X_test_c, Y_test_c)],\n              verbose = False, \n              early_stopping_rounds = 10)\n    \n    y_pred = model.predict(X_test_c)\n    score = np.sqrt(metrics.mean_squared_error(Y_test_c, y_pred))\n    print(score)\n    return {'loss': score, 'status': STATUS_OK}    \n    \ndef optimize(trials, space):\n    \n    best = fmin(score, space, algo = tpe.suggest, max_evals = 1000)\n    return best\n\ntrials = Trials()\nbest_params = optimize(trials, space)\n"
In [ ]:
#space_eval(space, best_params)
In [ ]:
model_opt_c = XGBRegressor(max_depth = 13, 
                         n_estimators = 250, 
                         learning_rate = 0.30000000000000004, 
                         min_child_weight = 9, 
                         subsample = 0.5,
                         colsample_bytree = 0.5, 
                         gamma = 0.0, 
                         reg_alpha = 1.0, 
                         reg_lambda = 10.5, 
                         objective = 'reg:squarederror')

# Fit with hp datasets
model_opt_c.fit(X_train_c, Y_train_c, 
              eval_set = [(X_train_c, Y_train_c), (X_test_c, Y_test_c)], 
              eval_metric = 'rmse', 
              verbose = True, 
              early_stopping_rounds = 10)
[0]	validation_0-rmse:11.19094	validation_1-rmse:10.21909
[1]	validation_0-rmse:9.53060	validation_1-rmse:8.45135
[2]	validation_0-rmse:8.26980	validation_1-rmse:7.12965
[3]	validation_0-rmse:7.13575	validation_1-rmse:6.01120
[4]	validation_0-rmse:6.45517	validation_1-rmse:5.23022
[5]	validation_0-rmse:5.87023	validation_1-rmse:4.83898
[6]	validation_0-rmse:5.55268	validation_1-rmse:4.45185
[7]	validation_0-rmse:5.28043	validation_1-rmse:4.15694
[8]	validation_0-rmse:4.77296	validation_1-rmse:4.02742
[9]	validation_0-rmse:4.67713	validation_1-rmse:3.99582
[10]	validation_0-rmse:4.53926	validation_1-rmse:3.78781
[11]	validation_0-rmse:4.45648	validation_1-rmse:3.74978
[12]	validation_0-rmse:4.42956	validation_1-rmse:3.69587
[13]	validation_0-rmse:4.32071	validation_1-rmse:3.45815
[14]	validation_0-rmse:3.92039	validation_1-rmse:3.40231
[15]	validation_0-rmse:3.88864	validation_1-rmse:3.41664
[16]	validation_0-rmse:3.61448	validation_1-rmse:3.42581
[17]	validation_0-rmse:3.50858	validation_1-rmse:3.24394
[18]	validation_0-rmse:3.33079	validation_1-rmse:3.19581
[19]	validation_0-rmse:3.28946	validation_1-rmse:3.12347
[20]	validation_0-rmse:3.08072	validation_1-rmse:3.05454
[21]	validation_0-rmse:3.02748	validation_1-rmse:2.96285
[22]	validation_0-rmse:2.99386	validation_1-rmse:2.89426
[23]	validation_0-rmse:2.95573	validation_1-rmse:2.83646
[24]	validation_0-rmse:2.86275	validation_1-rmse:2.79998
[25]	validation_0-rmse:2.75227	validation_1-rmse:2.79050
[26]	validation_0-rmse:2.62886	validation_1-rmse:2.76779
[27]	validation_0-rmse:2.61287	validation_1-rmse:2.73463
[28]	validation_0-rmse:2.60030	validation_1-rmse:2.70215
[29]	validation_0-rmse:2.51795	validation_1-rmse:2.63794
[30]	validation_0-rmse:2.48660	validation_1-rmse:2.55984
[31]	validation_0-rmse:2.46156	validation_1-rmse:2.51675
[32]	validation_0-rmse:2.43700	validation_1-rmse:2.46824
[33]	validation_0-rmse:2.37688	validation_1-rmse:2.43472
[34]	validation_0-rmse:2.36199	validation_1-rmse:2.41402
[35]	validation_0-rmse:2.28280	validation_1-rmse:2.44761
[36]	validation_0-rmse:2.22994	validation_1-rmse:2.36619
[37]	validation_0-rmse:2.18815	validation_1-rmse:2.38843
[38]	validation_0-rmse:2.16510	validation_1-rmse:2.37614
[39]	validation_0-rmse:2.07852	validation_1-rmse:2.31902
[40]	validation_0-rmse:2.07048	validation_1-rmse:2.29666
[41]	validation_0-rmse:2.05843	validation_1-rmse:2.31095
[42]	validation_0-rmse:2.03116	validation_1-rmse:2.33620
[43]	validation_0-rmse:2.02644	validation_1-rmse:2.34367
[44]	validation_0-rmse:1.99091	validation_1-rmse:2.27707
[45]	validation_0-rmse:1.95584	validation_1-rmse:2.27670
[46]	validation_0-rmse:1.89822	validation_1-rmse:2.28918
[47]	validation_0-rmse:1.88714	validation_1-rmse:2.29368
[48]	validation_0-rmse:1.84492	validation_1-rmse:2.27023
[49]	validation_0-rmse:1.83677	validation_1-rmse:2.27558
[50]	validation_0-rmse:1.83391	validation_1-rmse:2.29583
[51]	validation_0-rmse:1.79817	validation_1-rmse:2.26786
[52]	validation_0-rmse:1.77853	validation_1-rmse:2.26526
[53]	validation_0-rmse:1.76566	validation_1-rmse:2.25349
[54]	validation_0-rmse:1.75355	validation_1-rmse:2.29135
[55]	validation_0-rmse:1.74423	validation_1-rmse:2.29689
[56]	validation_0-rmse:1.72697	validation_1-rmse:2.26273
[57]	validation_0-rmse:1.70921	validation_1-rmse:2.22424
[58]	validation_0-rmse:1.69903	validation_1-rmse:2.21254
[59]	validation_0-rmse:1.69128	validation_1-rmse:2.19272
[60]	validation_0-rmse:1.68968	validation_1-rmse:2.19419
[61]	validation_0-rmse:1.67313	validation_1-rmse:2.16944
[62]	validation_0-rmse:1.66165	validation_1-rmse:2.15539
[63]	validation_0-rmse:1.65865	validation_1-rmse:2.13913
[64]	validation_0-rmse:1.64576	validation_1-rmse:2.11399
[65]	validation_0-rmse:1.64442	validation_1-rmse:2.11545
[66]	validation_0-rmse:1.64099	validation_1-rmse:2.13727
[67]	validation_0-rmse:1.63626	validation_1-rmse:2.10876
[68]	validation_0-rmse:1.61440	validation_1-rmse:2.08529
[69]	validation_0-rmse:1.60495	validation_1-rmse:2.07331
[70]	validation_0-rmse:1.58476	validation_1-rmse:2.04268
[71]	validation_0-rmse:1.56710	validation_1-rmse:2.04145
[72]	validation_0-rmse:1.55546	validation_1-rmse:2.05229
[73]	validation_0-rmse:1.55084	validation_1-rmse:2.03709
[74]	validation_0-rmse:1.54614	validation_1-rmse:2.04807
[75]	validation_0-rmse:1.53562	validation_1-rmse:2.07461
[76]	validation_0-rmse:1.52005	validation_1-rmse:2.07641
[77]	validation_0-rmse:1.51111	validation_1-rmse:2.05393
[78]	validation_0-rmse:1.50491	validation_1-rmse:2.06956
[79]	validation_0-rmse:1.49506	validation_1-rmse:2.05850
[80]	validation_0-rmse:1.48495	validation_1-rmse:2.02665
[81]	validation_0-rmse:1.47988	validation_1-rmse:2.01887
[82]	validation_0-rmse:1.47687	validation_1-rmse:2.00735
[83]	validation_0-rmse:1.47323	validation_1-rmse:2.01186
[84]	validation_0-rmse:1.46686	validation_1-rmse:2.00596
[85]	validation_0-rmse:1.46377	validation_1-rmse:2.01324
[86]	validation_0-rmse:1.46282	validation_1-rmse:2.03997
[87]	validation_0-rmse:1.46038	validation_1-rmse:2.03667
[88]	validation_0-rmse:1.45529	validation_1-rmse:2.03049
[89]	validation_0-rmse:1.44203	validation_1-rmse:2.00438
[90]	validation_0-rmse:1.43224	validation_1-rmse:1.98135
[91]	validation_0-rmse:1.42953	validation_1-rmse:2.00441
[92]	validation_0-rmse:1.42214	validation_1-rmse:2.01116
[93]	validation_0-rmse:1.41891	validation_1-rmse:2.00497
[94]	validation_0-rmse:1.41717	validation_1-rmse:2.01406
[95]	validation_0-rmse:1.41445	validation_1-rmse:1.99841
[96]	validation_0-rmse:1.41057	validation_1-rmse:1.98324
[97]	validation_0-rmse:1.39425	validation_1-rmse:1.95729
[98]	validation_0-rmse:1.38785	validation_1-rmse:1.94350
[99]	validation_0-rmse:1.38842	validation_1-rmse:1.94274
[100]	validation_0-rmse:1.37894	validation_1-rmse:1.92117
[101]	validation_0-rmse:1.37560	validation_1-rmse:1.90599
[102]	validation_0-rmse:1.37231	validation_1-rmse:1.90777
[103]	validation_0-rmse:1.36906	validation_1-rmse:1.91962
[104]	validation_0-rmse:1.36466	validation_1-rmse:1.90872
[105]	validation_0-rmse:1.36202	validation_1-rmse:1.90623
[106]	validation_0-rmse:1.35876	validation_1-rmse:1.87981
[107]	validation_0-rmse:1.35290	validation_1-rmse:1.87243
[108]	validation_0-rmse:1.35255	validation_1-rmse:1.85514
[109]	validation_0-rmse:1.33722	validation_1-rmse:1.85829
[110]	validation_0-rmse:1.33031	validation_1-rmse:1.83008
[111]	validation_0-rmse:1.32799	validation_1-rmse:1.82000
[112]	validation_0-rmse:1.32281	validation_1-rmse:1.80747
[113]	validation_0-rmse:1.31622	validation_1-rmse:1.80035
[114]	validation_0-rmse:1.31266	validation_1-rmse:1.79771
[115]	validation_0-rmse:1.31041	validation_1-rmse:1.79802
[116]	validation_0-rmse:1.30981	validation_1-rmse:1.79413
[117]	validation_0-rmse:1.30755	validation_1-rmse:1.81911
[118]	validation_0-rmse:1.30584	validation_1-rmse:1.81760
[119]	validation_0-rmse:1.30434	validation_1-rmse:1.78371
[120]	validation_0-rmse:1.30209	validation_1-rmse:1.79937
[121]	validation_0-rmse:1.30084	validation_1-rmse:1.80381
[122]	validation_0-rmse:1.29755	validation_1-rmse:1.78571
[123]	validation_0-rmse:1.29690	validation_1-rmse:1.79369
[124]	validation_0-rmse:1.28843	validation_1-rmse:1.80070
[125]	validation_0-rmse:1.28414	validation_1-rmse:1.81011
[126]	validation_0-rmse:1.28441	validation_1-rmse:1.80808
[127]	validation_0-rmse:1.28297	validation_1-rmse:1.83498
[128]	validation_0-rmse:1.28246	validation_1-rmse:1.83989
Out[ ]:
XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.5, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0.0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.30000000000000004,
             max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=13, max_leaves=None,
             min_child_weight=9, missing=nan, monotone_constraints=None,
             n_estimators=250, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.5, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0.0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.30000000000000004,
             max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=13, max_leaves=None,
             min_child_weight=9, missing=nan, monotone_constraints=None,
             n_estimators=250, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=None, ...)
In [ ]:
y_pred_1_opt = model_opt_c.predict(X_test_c)
In [ ]:
y_line = np.arange(int(Y_test_c.min()), int(Y_test_c.max()))
plt.figure(figsize = (14, 10))
ax = plt.axes()
plt.style.use('dark_background')
plt.plot(y_line, y_line, 'k--', lw = 1, label = 'Perfect Fit',color='w')
sns.scatterplot(x=y_pred_1_opt, y=Y_test_c, s = 100,hue=Y_test_c,palette='magma',markers=True)
plt.xlabel('Predicted Depth Values (mm)', fontsize = 20, labelpad = 15)
plt.ylabel('True Depth Values (mm)', fontsize = 20, labelpad = 15)
plt.title('XGB Optimized RAC Carbonization Depth', fontsize = 22, c = 'w', pad = 20)
plt.legend(fontsize = 15)
plt.tick_params(labelsize = 15)
plt.show()
No description has been provided for this image
In [ ]:
residuals_1_opt = Y_test_c - y_pred_1_opt
residuals_1_opt
Out[ ]:
array([ 0.08583717,  0.81206036,  2.96483343, -1.01656654,  2.74546642,
        1.45466785, -1.17684078,  0.91792057,  5.72451012,  0.80893982,
       -2.87309227,  0.1045742 ,  0.09766891, -2.17442556,  2.68523598,
        2.51822929, -0.57663689, -0.36102467,  2.61611977, -0.53945146,
       -1.89656162, -0.72413769,  0.7779084 ,  2.20488937,  1.58403946,
       -2.14355707, -0.03260311,  0.00927668,  1.94363266, -4.01559677,
       -0.02469463,  0.94820145,  1.11903313,  0.08134613,  1.01452402,
        0.91821629,  1.13538715, -1.99783337,  0.65163467, -1.02837749,
        0.99772148, -0.57172472, -1.10571278,  1.06970055, -2.26024689,
        0.46615635])
In [ ]:
plt.figure(figsize = (12, 10))
ax = plt.axes()
sns.histplot(residuals_1_opt, bins = 5, alpha = 0.5,kde=True,binwidth=1,color = 'r')
plt.xlabel('Residual Values (mm)', fontsize = 20, labelpad = 15)
plt.ylabel('Count', fontsize = 20, labelpad = 15)
plt.title('Histogram of Error Values (RMSE)', fontsize = 22, c = 'w', pad = 20)
plt.tick_params(labelsize = 15)
plt.legend(fontsize = 12)
plt.style.use('bmh')
plt.show()
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No description has been provided for this image
In [ ]:
from sklearn.metrics import r2_score
yhat=model_opt_c.predict(X_train_c)
r_squared = r2_score(Y_train_c,yhat)
print(r_squared)
yhat=model_opt_c.predict(X_test_c)
r_squared = r2_score(Y_test_c,yhat)
print(r_squared)
0.9732719326896205
0.9316704193603992

Bayes Optimization for Sulfate Corrosion¶

In [ ]:
'''
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval
from sklearn import metrics

# Create hyperparameter space to search over
space = {'max_depth': hp.choice('max_depth', np.arange(3, 15, 1, dtype = int)),
        'n_estimators': hp.choice('n_estimators', np.arange(50, 300, 10, dtype = int)),
        'colsample_bytree': hp.quniform('colsample_bytree', 0.5, 1.0, 0.1),
        'min_child_weight': hp.choice('min_child_weight', np.arange(0, 10, 1, dtype = int)),
        'subsample': hp.quniform('subsample', 0.5, 1.0, 0.1),
        'learning_rate': hp.quniform('learning_rate', 0.1, 0.3, 0.1),
         'gamma': hp.choice('gamma', np.arange(0, 20, 0.5, dtype = float)),
         'reg_alpha': hp.choice('reg_alpha', np.arange(0, 20, 0.5, dtype = float)),
         'reg_lambda': hp.choice('reg_lambda', np.arange(0, 20, 0.5, dtype = float)),
         
        'objective': 'reg:squarederror',
        
        'eval_metric': 'rmse'}

def score(params):
    model = XGBRegressor(**params)
    
    model.fit(X_train_sf, Y_train_sf, 
              eval_set = [(X_train_sf, Y_train_sf), (X_test_sf, Y_test_sf)],
              verbose = False, 
              early_stopping_rounds = 10)
    
    y_pred = model.predict(X_test_sf)
    score = np.sqrt(metrics.mean_squared_error(Y_test_sf, y_pred))
    print(score)
    return {'loss': score, 'status': STATUS_OK}    
    
def optimize(trials, space):
    
    best = fmin(score, space, algo = tpe.suggest, max_evals = 1000)
    return best

trials = Trials()
best_params = optimize(trials, space)
'''
Out[ ]:
"\nfrom hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval\nfrom sklearn import metrics\n\n# Create hyperparameter space to search over\nspace = {'max_depth': hp.choice('max_depth', np.arange(3, 15, 1, dtype = int)),\n        'n_estimators': hp.choice('n_estimators', np.arange(50, 300, 10, dtype = int)),\n        'colsample_bytree': hp.quniform('colsample_bytree', 0.5, 1.0, 0.1),\n        'min_child_weight': hp.choice('min_child_weight', np.arange(0, 10, 1, dtype = int)),\n        'subsample': hp.quniform('subsample', 0.5, 1.0, 0.1),\n        'learning_rate': hp.quniform('learning_rate', 0.1, 0.3, 0.1),\n         'gamma': hp.choice('gamma', np.arange(0, 20, 0.5, dtype = float)),\n         'reg_alpha': hp.choice('reg_alpha', np.arange(0, 20, 0.5, dtype = float)),\n         'reg_lambda': hp.choice('reg_lambda', np.arange(0, 20, 0.5, dtype = float)),\n         \n        'objective': 'reg:squarederror',\n        \n        'eval_metric': 'rmse'}\n\ndef score(params):\n    model = XGBRegressor(**params)\n    \n    model.fit(X_train_sf, Y_train_sf, \n              eval_set = [(X_train_sf, Y_train_sf), (X_test_sf, Y_test_sf)],\n              verbose = False, \n              early_stopping_rounds = 10)\n    \n    y_pred = model.predict(X_test_sf)\n    score = np.sqrt(metrics.mean_squared_error(Y_test_sf, y_pred))\n    print(score)\n    return {'loss': score, 'status': STATUS_OK}    \n    \ndef optimize(trials, space):\n    \n    best = fmin(score, space, algo = tpe.suggest, max_evals = 1000)\n    return best\n\ntrials = Trials()\nbest_params = optimize(trials, space)\n"
In [ ]:
#space_eval(space, best_params)
In [ ]:
model_opt_sf = XGBRegressor(max_depth = 12, 
                         n_estimators = 250, 
                         learning_rate = 0.30000000000000004, 
                         min_child_weight = 4, 
                         subsample = 0.8,
                         colsample_bytree = 0.7000000000000001, 
                         gamma = 0.0, 
                         reg_alpha = 0.0, 
                         reg_lambda = 0.5, 
                         objective = 'reg:squarederror')

# Fit with hp datasets
model_opt_sf.fit(X_train_sf, Y_train_sf, 
              eval_set = [(X_train_sf, Y_train_sf), (X_test_sf, Y_test_sf)], 
              eval_metric = 'rmse', 
              verbose = True, 
              early_stopping_rounds = 10)
[0]	validation_0-rmse:0.31080	validation_1-rmse:0.30846
[1]	validation_0-rmse:0.22714	validation_1-rmse:0.23632
[2]	validation_0-rmse:0.16823	validation_1-rmse:0.18801
[3]	validation_0-rmse:0.14011	validation_1-rmse:0.15601
[4]	validation_0-rmse:0.10672	validation_1-rmse:0.13817
[5]	validation_0-rmse:0.08281	validation_1-rmse:0.12586
[6]	validation_0-rmse:0.06543	validation_1-rmse:0.11761
[7]	validation_0-rmse:0.05486	validation_1-rmse:0.11453
[8]	validation_0-rmse:0.04587	validation_1-rmse:0.11038
[9]	validation_0-rmse:0.04076	validation_1-rmse:0.10635
[10]	validation_0-rmse:0.03844	validation_1-rmse:0.10553
[11]	validation_0-rmse:0.03550	validation_1-rmse:0.10324
[12]	validation_0-rmse:0.03413	validation_1-rmse:0.10238
[13]	validation_0-rmse:0.03321	validation_1-rmse:0.09993
[14]	validation_0-rmse:0.03042	validation_1-rmse:0.09838
[15]	validation_0-rmse:0.02836	validation_1-rmse:0.09562
[16]	validation_0-rmse:0.02775	validation_1-rmse:0.09460
[17]	validation_0-rmse:0.02722	validation_1-rmse:0.09252
[18]	validation_0-rmse:0.02682	validation_1-rmse:0.09135
[19]	validation_0-rmse:0.02616	validation_1-rmse:0.09179
[20]	validation_0-rmse:0.02577	validation_1-rmse:0.09082
[21]	validation_0-rmse:0.02525	validation_1-rmse:0.09011
[22]	validation_0-rmse:0.02421	validation_1-rmse:0.08948
[23]	validation_0-rmse:0.02325	validation_1-rmse:0.08918
[24]	validation_0-rmse:0.02264	validation_1-rmse:0.08902
[25]	validation_0-rmse:0.02205	validation_1-rmse:0.08744
[26]	validation_0-rmse:0.02121	validation_1-rmse:0.08751
[27]	validation_0-rmse:0.02123	validation_1-rmse:0.08744
[28]	validation_0-rmse:0.02026	validation_1-rmse:0.08730
[29]	validation_0-rmse:0.01977	validation_1-rmse:0.08776
[30]	validation_0-rmse:0.01883	validation_1-rmse:0.08932
[31]	validation_0-rmse:0.01831	validation_1-rmse:0.08795
[32]	validation_0-rmse:0.01746	validation_1-rmse:0.08829
[33]	validation_0-rmse:0.01728	validation_1-rmse:0.08700
[34]	validation_0-rmse:0.01708	validation_1-rmse:0.08617
[35]	validation_0-rmse:0.01672	validation_1-rmse:0.08638
[36]	validation_0-rmse:0.01656	validation_1-rmse:0.08678
[37]	validation_0-rmse:0.01625	validation_1-rmse:0.08690
[38]	validation_0-rmse:0.01580	validation_1-rmse:0.08759
[39]	validation_0-rmse:0.01535	validation_1-rmse:0.08842
[40]	validation_0-rmse:0.01512	validation_1-rmse:0.08862
[41]	validation_0-rmse:0.01492	validation_1-rmse:0.08837
[42]	validation_0-rmse:0.01478	validation_1-rmse:0.08779
[43]	validation_0-rmse:0.01473	validation_1-rmse:0.08841
[44]	validation_0-rmse:0.01425	validation_1-rmse:0.08786
Out[ ]:
XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.7000000000000001, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0.0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.30000000000000004,
             max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=12, max_leaves=None,
             min_child_weight=4, missing=nan, monotone_constraints=None,
             n_estimators=250, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.7000000000000001, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0.0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.30000000000000004,
             max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=12, max_leaves=None,
             min_child_weight=4, missing=nan, monotone_constraints=None,
             n_estimators=250, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=None, ...)
In [ ]:
y_pred_1_opt = model_opt_sf.predict(X_test_sf)
In [ ]:
y_line = np.arange(int(Y_test_sf.min()), int(Y_test_sf.max())+1.5 )
plt.figure(figsize = (14, 10))
ax = plt.axes()
plt.style.use('dark_background')
plt.plot(y_line, y_line, 'k--', lw = 1, label = 'Perfect Fit',color='w')
sns.scatterplot(x=y_pred_1_opt, y=Y_test_sf, s = 100,hue=Y_test_sf,palette='magma',markers=True)
plt.xlabel('Predicted Factor (K)', fontsize = 20, labelpad = 15)
plt.ylabel('True Factor (K)', fontsize = 20, labelpad = 15)
plt.title('XGB Optimized RAC Sulfate Corrosion Factor', fontsize = 22, c = 'w', pad = 20)
plt.legend(fontsize = 15)
plt.tick_params(labelsize = 15)
plt.show()
No description has been provided for this image
In [ ]:
residuals_1_opt = Y_test_sf - y_pred_1_opt
residuals_1_opt
Out[ ]:
array([ 0.00477047,  0.19026587,  0.07372953, -0.00294804, -0.0154757 ,
       -0.18574862,  0.01844478,  0.00266603,  0.01622095,  0.07313391,
       -0.01233398, -0.09885678, -0.10795001, -0.00291286])
In [ ]:
plt.figure(figsize = (12, 10))
ax = plt.axes()
sns.histplot(residuals_1_opt, bins = 15, alpha = 0.5,kde=True,color = 'r')
plt.xlabel('Residual Values (K)', fontsize = 20, labelpad = 15)
plt.ylabel('Count', fontsize = 20, labelpad = 15)
plt.title('Histogram of Error Values (RMSE)', fontsize = 22, c = 'w', pad = 20)
plt.tick_params(labelsize = 15)
plt.legend(fontsize = 12)
plt.style.use('bmh')
plt.show()
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No description has been provided for this image
In [ ]:
from sklearn.metrics import r2_score
yhat=model_opt_sf.predict(X_train_sf)
r_squared = r2_score(Y_train_sf,yhat)
print(r_squared)
yhat=model_opt_sf.predict(X_test_sf)
r_squared = r2_score(Y_test_sf,yhat)
print(r_squared)
0.9947415003055488
0.8775381393119359

Bayes Search Optimization for Chloride Ion Erosion¶

In [ ]:
'''
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval
from sklearn import metrics

# Create hyperparameter space to search over
space = {'max_depth': hp.choice('max_depth', np.arange(3, 15, 1, dtype = int)),
        'n_estimators': hp.choice('n_estimators', np.arange(50, 300, 10, dtype = int)),
        'colsample_bytree': hp.quniform('colsample_bytree', 0.5, 1.0, 0.1),
        'min_child_weight': hp.choice('min_child_weight', np.arange(0, 10, 1, dtype = int)),
        'subsample': hp.quniform('subsample', 0.5, 1.0, 0.1),
        'learning_rate': hp.quniform('learning_rate', 0.1, 0.3, 0.1),
         'gamma': hp.choice('gamma', np.arange(0, 20, 0.5, dtype = float)),
         'reg_alpha': hp.choice('reg_alpha', np.arange(0, 20, 0.5, dtype = float)),
         'reg_lambda': hp.choice('reg_lambda', np.arange(0, 20, 0.5, dtype = float)),
         
        'objective': 'reg:squarederror',
        
        'eval_metric': 'rmse'}

def score(params):
    model = XGBRegressor(**params)
    
    model.fit(X_train_cl, Y_train_cl, 
              eval_set = [(X_train_cl, Y_train_cl), (X_test_cl, Y_test_cl)],
              verbose = False, 
              early_stopping_rounds = 10)
    
    y_pred = model.predict(X_test_cl)
    score = np.sqrt(metrics.mean_squared_error(Y_test_cl, y_pred))
    print(score)
    return {'loss': score, 'status': STATUS_OK}    
    
def optimize(trials, space):
    
    best = fmin(score, space, algo = tpe.suggest, max_evals = 1000)
    return best

trials = Trials()
best_params = optimize(trials, space)
'''
Out[ ]:
"\nfrom hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval\nfrom sklearn import metrics\n\n# Create hyperparameter space to search over\nspace = {'max_depth': hp.choice('max_depth', np.arange(3, 15, 1, dtype = int)),\n        'n_estimators': hp.choice('n_estimators', np.arange(50, 300, 10, dtype = int)),\n        'colsample_bytree': hp.quniform('colsample_bytree', 0.5, 1.0, 0.1),\n        'min_child_weight': hp.choice('min_child_weight', np.arange(0, 10, 1, dtype = int)),\n        'subsample': hp.quniform('subsample', 0.5, 1.0, 0.1),\n        'learning_rate': hp.quniform('learning_rate', 0.1, 0.3, 0.1),\n         'gamma': hp.choice('gamma', np.arange(0, 20, 0.5, dtype = float)),\n         'reg_alpha': hp.choice('reg_alpha', np.arange(0, 20, 0.5, dtype = float)),\n         'reg_lambda': hp.choice('reg_lambda', np.arange(0, 20, 0.5, dtype = float)),\n         \n        'objective': 'reg:squarederror',\n        \n        'eval_metric': 'rmse'}\n\ndef score(params):\n    model = XGBRegressor(**params)\n    \n    model.fit(X_train_cl, Y_train_cl, \n              eval_set = [(X_train_cl, Y_train_cl), (X_test_cl, Y_test_cl)],\n              verbose = False, \n              early_stopping_rounds = 10)\n    \n    y_pred = model.predict(X_test_cl)\n    score = np.sqrt(metrics.mean_squared_error(Y_test_cl, y_pred))\n    print(score)\n    return {'loss': score, 'status': STATUS_OK}    \n    \ndef optimize(trials, space):\n    \n    best = fmin(score, space, algo = tpe.suggest, max_evals = 1000)\n    return best\n\ntrials = Trials()\nbest_params = optimize(trials, space)\n"
In [ ]:
#space_eval(space, best_params)
In [ ]:
model_opt_cl = XGBRegressor(max_depth = 11, 
                         n_estimators = 220, 
                         learning_rate = 0.2, 
                         min_child_weight = 0, 
                         subsample = 0.8,
                         colsample_bytree = 0.7000000000000001, 
                         gamma = 16.0, 
                         reg_alpha = 18.0, 
                         reg_lambda = 12.5, 
                         objective = 'reg:squarederror')

# Fit with hp datasets
model_opt_cl.fit(X_train_cl, Y_train_cl, 
              eval_set = [(X_train_cl, Y_train_cl), (X_test_cl, Y_test_cl)], 
              eval_metric = 'rmse', 
              verbose = True, 
              early_stopping_rounds = 10)
[0]	validation_0-rmse:2898.74705	validation_1-rmse:2402.66924
[1]	validation_0-rmse:2478.55768	validation_1-rmse:2012.40785
[2]	validation_0-rmse:2155.59121	validation_1-rmse:1696.77278
[3]	validation_0-rmse:1877.15980	validation_1-rmse:1441.39662
[4]	validation_0-rmse:1668.68426	validation_1-rmse:1249.12972
[5]	validation_0-rmse:1469.99469	validation_1-rmse:1059.38439
[6]	validation_0-rmse:1323.03448	validation_1-rmse:932.19727
[7]	validation_0-rmse:1205.03525	validation_1-rmse:849.60872
[8]	validation_0-rmse:1102.52672	validation_1-rmse:754.49547
[9]	validation_0-rmse:1012.69604	validation_1-rmse:667.47603
[10]	validation_0-rmse:930.54045	validation_1-rmse:600.29617
[11]	validation_0-rmse:864.02972	validation_1-rmse:549.36893
[12]	validation_0-rmse:808.75124	validation_1-rmse:521.61344
[13]	validation_0-rmse:767.92938	validation_1-rmse:501.19660
[14]	validation_0-rmse:721.56764	validation_1-rmse:493.12230
[15]	validation_0-rmse:688.31544	validation_1-rmse:489.90241
[16]	validation_0-rmse:657.68185	validation_1-rmse:467.56675
[17]	validation_0-rmse:625.81988	validation_1-rmse:448.02196
[18]	validation_0-rmse:592.81164	validation_1-rmse:432.49434
[19]	validation_0-rmse:568.63019	validation_1-rmse:423.84901
[20]	validation_0-rmse:543.05352	validation_1-rmse:429.87242
[21]	validation_0-rmse:521.64979	validation_1-rmse:423.90150
[22]	validation_0-rmse:496.08052	validation_1-rmse:430.81457
[23]	validation_0-rmse:474.64604	validation_1-rmse:429.78588
[24]	validation_0-rmse:453.96105	validation_1-rmse:428.79756
[25]	validation_0-rmse:439.71898	validation_1-rmse:424.19240
[26]	validation_0-rmse:430.04577	validation_1-rmse:420.08393
[27]	validation_0-rmse:412.76728	validation_1-rmse:402.12516
[28]	validation_0-rmse:402.89145	validation_1-rmse:399.18282
[29]	validation_0-rmse:391.99815	validation_1-rmse:394.50311
[30]	validation_0-rmse:379.60773	validation_1-rmse:395.96154
[31]	validation_0-rmse:365.09207	validation_1-rmse:401.98717
[32]	validation_0-rmse:353.39436	validation_1-rmse:399.66503
[33]	validation_0-rmse:343.01995	validation_1-rmse:387.75501
[34]	validation_0-rmse:333.44977	validation_1-rmse:376.27544
[35]	validation_0-rmse:323.49648	validation_1-rmse:372.03908
[36]	validation_0-rmse:316.70779	validation_1-rmse:376.29892
[37]	validation_0-rmse:309.40533	validation_1-rmse:373.87829
[38]	validation_0-rmse:302.05736	validation_1-rmse:371.03768
[39]	validation_0-rmse:297.20982	validation_1-rmse:375.88598
[40]	validation_0-rmse:292.13944	validation_1-rmse:378.06687
[41]	validation_0-rmse:283.33049	validation_1-rmse:382.29843
[42]	validation_0-rmse:276.48175	validation_1-rmse:380.93831
[43]	validation_0-rmse:268.10283	validation_1-rmse:369.08756
[44]	validation_0-rmse:260.55281	validation_1-rmse:363.89012
[45]	validation_0-rmse:252.61092	validation_1-rmse:365.25588
[46]	validation_0-rmse:246.85552	validation_1-rmse:357.48156
[47]	validation_0-rmse:242.54319	validation_1-rmse:356.35162
[48]	validation_0-rmse:238.27906	validation_1-rmse:362.07778
[49]	validation_0-rmse:232.91064	validation_1-rmse:359.65328
[50]	validation_0-rmse:224.98993	validation_1-rmse:355.20659
[51]	validation_0-rmse:220.17279	validation_1-rmse:356.80272
[52]	validation_0-rmse:215.57903	validation_1-rmse:352.62934
[53]	validation_0-rmse:209.91417	validation_1-rmse:347.34857
[54]	validation_0-rmse:207.82252	validation_1-rmse:344.52832
[55]	validation_0-rmse:205.31418	validation_1-rmse:343.00369
[56]	validation_0-rmse:202.39825	validation_1-rmse:340.41040
[57]	validation_0-rmse:199.09269	validation_1-rmse:338.24101
[58]	validation_0-rmse:194.59998	validation_1-rmse:332.98531
[59]	validation_0-rmse:191.78800	validation_1-rmse:332.08976
[60]	validation_0-rmse:189.25083	validation_1-rmse:331.34210
[61]	validation_0-rmse:185.31336	validation_1-rmse:330.38685
[62]	validation_0-rmse:180.73959	validation_1-rmse:327.39467
[63]	validation_0-rmse:176.88775	validation_1-rmse:326.37211
[64]	validation_0-rmse:174.15135	validation_1-rmse:327.10629
[65]	validation_0-rmse:170.50016	validation_1-rmse:322.69359
[66]	validation_0-rmse:167.47890	validation_1-rmse:323.51637
[67]	validation_0-rmse:165.25154	validation_1-rmse:322.28677
[68]	validation_0-rmse:163.50366	validation_1-rmse:322.02675
[69]	validation_0-rmse:161.72063	validation_1-rmse:319.67808
[70]	validation_0-rmse:160.45291	validation_1-rmse:318.73098
[71]	validation_0-rmse:157.25221	validation_1-rmse:317.01869
[72]	validation_0-rmse:154.90480	validation_1-rmse:316.66077
[73]	validation_0-rmse:152.92387	validation_1-rmse:315.61698
[74]	validation_0-rmse:149.44140	validation_1-rmse:312.18477
[75]	validation_0-rmse:146.17917	validation_1-rmse:311.77227
[76]	validation_0-rmse:144.56587	validation_1-rmse:314.06823
[77]	validation_0-rmse:142.82946	validation_1-rmse:313.48809
[78]	validation_0-rmse:141.09725	validation_1-rmse:313.23675
[79]	validation_0-rmse:139.23872	validation_1-rmse:310.08778
[80]	validation_0-rmse:136.84796	validation_1-rmse:310.93924
[81]	validation_0-rmse:134.47364	validation_1-rmse:310.74198
[82]	validation_0-rmse:133.56505	validation_1-rmse:310.11792
[83]	validation_0-rmse:131.14567	validation_1-rmse:308.00468
[84]	validation_0-rmse:129.15919	validation_1-rmse:307.67320
[85]	validation_0-rmse:126.87424	validation_1-rmse:308.34739
[86]	validation_0-rmse:125.67902	validation_1-rmse:307.34380
[87]	validation_0-rmse:123.68690	validation_1-rmse:307.27525
[88]	validation_0-rmse:121.46145	validation_1-rmse:305.35852
[89]	validation_0-rmse:119.15137	validation_1-rmse:302.85580
[90]	validation_0-rmse:117.15709	validation_1-rmse:304.24827
[91]	validation_0-rmse:116.00485	validation_1-rmse:302.83994
[92]	validation_0-rmse:114.78821	validation_1-rmse:303.60118
[93]	validation_0-rmse:112.97138	validation_1-rmse:302.76955
[94]	validation_0-rmse:111.43424	validation_1-rmse:302.66890
[95]	validation_0-rmse:110.32686	validation_1-rmse:303.11177
[96]	validation_0-rmse:108.28413	validation_1-rmse:302.84626
[97]	validation_0-rmse:106.64890	validation_1-rmse:303.81004
[98]	validation_0-rmse:105.59609	validation_1-rmse:301.59268
[99]	validation_0-rmse:103.88003	validation_1-rmse:301.42634
[100]	validation_0-rmse:102.85737	validation_1-rmse:301.20262
[101]	validation_0-rmse:102.57752	validation_1-rmse:301.03622
[102]	validation_0-rmse:101.10670	validation_1-rmse:299.96238
[103]	validation_0-rmse:99.40097	validation_1-rmse:300.36662
[104]	validation_0-rmse:98.76311	validation_1-rmse:300.27979
[105]	validation_0-rmse:97.56816	validation_1-rmse:300.09801
[106]	validation_0-rmse:96.93694	validation_1-rmse:300.04814
[107]	validation_0-rmse:95.67288	validation_1-rmse:299.68689
[108]	validation_0-rmse:93.98050	validation_1-rmse:298.47387
[109]	validation_0-rmse:93.20700	validation_1-rmse:299.35917
[110]	validation_0-rmse:91.89550	validation_1-rmse:300.01296
[111]	validation_0-rmse:90.56063	validation_1-rmse:299.94362
[112]	validation_0-rmse:89.25272	validation_1-rmse:300.41098
[113]	validation_0-rmse:88.42727	validation_1-rmse:301.40591
[114]	validation_0-rmse:87.01303	validation_1-rmse:300.59831
[115]	validation_0-rmse:86.44569	validation_1-rmse:299.29989
[116]	validation_0-rmse:85.84067	validation_1-rmse:299.66269
[117]	validation_0-rmse:84.56557	validation_1-rmse:299.86318
Out[ ]:
XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.7000000000000001, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=16.0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.2, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=11, max_leaves=None,
             min_child_weight=0, missing=nan, monotone_constraints=None,
             n_estimators=220, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.7000000000000001, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=16.0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.2, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=11, max_leaves=None,
             min_child_weight=0, missing=nan, monotone_constraints=None,
             n_estimators=220, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=None, ...)
In [ ]:
y_pred_1_opt = model_opt_cl.predict(X_test_cl)
In [ ]:
y_line = np.arange(int(Y_test_cl.min()) - 10, int(Y_test_cl.max()) + 10)
plt.figure(figsize = (14, 10))
ax = plt.axes()
plt.style.use('dark_background')
plt.plot(y_line, y_line, 'k--', lw = 1, label = 'Perfect Fit',color='w')
sns.scatterplot(x=y_pred_1_opt, y=Y_test_cl, s = 100,hue=Y_test_cl,palette='magma',markers=True)
plt.xlabel('Predicted Current Values (mC)', fontsize = 20, labelpad = 15)
plt.ylabel('True Current Values (mC)', fontsize = 20, labelpad = 15)
plt.title('XGB Optimized RAC Charge Predictions', fontsize = 22, c = 'w', pad = 20)
plt.legend(fontsize = 15)
plt.tick_params(labelsize = 15)
plt.show()
No description has been provided for this image
In [ ]:
residuals_1_opt = Y_test_cl - y_pred_1_opt
residuals_1_opt
Out[ ]:
array([ 155.98128906,  -32.625     , -778.40673828, -305.86445312,
        -60.76956299, -372.54248047,   83.86730957,  -53.58642578,
        381.43772461, -482.70263672,   51.51757812,   21.42285156,
        222.20947266,  666.61132812,   41.09887695, -303.66667969,
        -74.58642578,  -79.16162109,  -38.83007812, -286.76689453,
         62.78588867,  241.71806152, -211.04477051])
In [ ]:
plt.figure(figsize = (12, 10))
ax = plt.axes()
sns.histplot(residuals_1_opt, bins = 15, alpha = 0.5,kde=True,color = 'r')
plt.xlabel('Residual Values (mC)', fontsize = 20, labelpad = 15)
plt.ylabel('Count', fontsize = 20, labelpad = 15)
plt.title('Histogram of Error Values (RMSE)', fontsize = 22, c = 'w', pad = 20)
plt.tick_params(labelsize = 15)
plt.legend(fontsize = 12)
plt.style.use('bmh')
plt.show()
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No description has been provided for this image
In [ ]:
from sklearn.metrics import r2_score
yhat=model_opt_cl.predict(X_train_cl)
r_squared = r2_score(Y_train_cl,yhat)
print(r_squared)
yhat=model_opt_cl.predict(X_test_cl)
r_squared = r2_score(Y_test_cl,yhat)
print(r_squared)
0.9963958441783672
0.9540851277593169
In [ ]:
from skopt import BayesSearchCV
#Extreme Gradient Boosting Regressor Function Definition
from skopt.space import Real, Integer
#After Selection of Best Model using Bayes Search Optimizer to reduce overfitting
def bayes_xgb(X_train, Y_train):
   optimization = BayesSearchCV(
               XGBRegressor(),
         {
         'n_estimators' : Integer(50,800),
         'max_depth' :Integer(1,7),
         'learning_rate' : Real(0.01,1),
         'subsample' : Real(0.5,1),
         'colsample_bytree' : Real(0.5,1),
         'reg_alpha' : Real(0,20),
         'min_split_loss' : Integer(0,20),
         'min_child_weight' : Integer(0,20),
         'reg_lambda' : Real(0,20),
         'max_delta_step' : Integer(1,20)
      },
      n_iter=100,
      random_state=111
   )
   np.int = int
   # executes bayesian optimization
   _ = optimization.fit(X_train, Y_train)
   return _
In [ ]:
#Extreme Gradient Boosting Regressor Function Definition
n_estimators = np.arange(50,800,50)
max_depth = np.arange(1,7,1)
learning_rate = np.logspace(0.01,1,10)
subsample = np.arange(0,1,0.1)
colsample_bytree = np.arange(0,1,0.1)
reg_alpha = np.arange(0,10,1)
min_split_loss = np.arange(0,10,1)
min_child_weight = np.arange(0,10,1)
reg_lambda = np.arange(0,10,1)
#sampling_method = ['uniform','gradient_based']
max_delta_step = np.arange(1,10,1)
def xgb(X_train,Y_train):
    parameters = {'n_estimators': n_estimators,
    'max_depth': max_depth,
    'learning_rate': learning_rate,
    'subsample': subsample,
    'colsample_bytree': colsample_bytree,
    'reg_alpha': reg_alpha,
    'min_split_loss':min_split_loss,
    'min_child_weight':min_child_weight,
    'reg_lambda':reg_lambda,
    #'sampling_method':sampling_method,
    #'max_delta_step':max_delta_step,
    'gpu_id': [0]
    }
    xgb = XGBRegressor()
    xgb_cv = GridSearchCV(xgb,parameters,cv=5)
    xgb_cv.fit(X=X_train,y=Y_train)
    return xgb_cv
In [ ]:
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
In [ ]:
XGB_detailed = XGBRegressor(n_estimators=601,max_depth=6,learning_rate=0.4457817974015126,subsample=0.9788734713427625,colsample_bytree=0.6666273891320196,max_delta_step=3,reg_alpha=5.043999744632456,min_child_weight=1,min_split_loss=7,reg_lambda=6.499667302663611)
In [ ]:
XGB_Acc = []
def XGB_fit(DT,X_train,Y_train,X_test,Y_test):
    DT.fit(X_train,Y_train)
    yhat = DT.predict(X_test)
    r_squared = r2_score(Y_test, yhat)
    mse = mean_squared_error(Y_test, yhat)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(Y_test, yhat)
    DT_row=[round(r_squared,3),round(mse,3),round(rmse,3),round(mae,3)]
    XGB_Acc.append(DT_row)
In [ ]:
XGB_fit(XGB_detailed,X_train_det,Y_train_det,X_test_det,Y_test_det)
In [ ]:
#Extreme Gradient Boosting Random Forest Regressor Function Definition
def xgbrf(X_train,Y_train):
    parameters = {'n_estimators': [50, 100, 200,500],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.3],
    'subsample': [0.8, 1.0],
    'reg_lambda': [0, 0.1, 0.3],
    'reg_alpha': [0, 0.1, 0.3]}
    xgbrf = XGBRFRegressor()
    xgbrf_cv = GridSearchCV(xgbrf,parameters,cv=3)
    xgbrf_cv.fit(X=X_train,y=Y_train)
    print("tuned hyperparameters :(best parameters) ",xgbrf_cv.best_params_)
    print("accuracy :",xgbrf_cv.best_score_)
    
In [ ]:
'''
xgbrf(X_train_det,Y_train_det)
xgbrf(X_train_cs,Y_train_cs)
xgbrf(X_train_c,Y_train_c)
xgbrf(X_train_cl,Y_train_cl)
xgbrf(X_train_sf,Y_train_sf)
'''
Out[ ]:
'\nxgbrf(X_train_det,Y_train_det)\nxgbrf(X_train_cs,Y_train_cs)\nxgbrf(X_train_c,Y_train_c)\nxgbrf(X_train_cl,Y_train_cl)\nxgbrf(X_train_sf,Y_train_sf)\n'

Hyper Parameter Visualisations¶

In [ ]:
hp_vis = pd.read_csv("Reference Tables.csv")
In [ ]:
hp_vis
Out[ ]:
Algorithm Accuracy Metric RAC Strength NAC Strength RAC Carbonization RAC Chloride Ion Erosion RAC Sulfate Corrosion
0 Decision Trees R Squared 76.000 99.870 76.500 51.000 60.000
1 Decision Trees MSE 55.250 0.031 12.600 91787.000 0.010
2 Decision Trees RMSE 6.600 0.177 3.550 1044.000 0.100
3 Decision Trees MAE 5.000 0.104 2.483 584.000 0.061
4 Random Forest R Squared 83.400 99.870 86.000 76.800 86.500
5 Random Forest MSE 42.350 0.071 7.400 85718.000 0.003
6 Random Forest RMSE 6.506 0.267 2.720 621.000 0.057
7 Random Forest MAE 4.465 0.079 1.948 439.000 0.033
8 AdaBoost R Squared 70.100 99.900 71.600 68.800 81.900
9 AdaBoost MSE 76.930 0.220 14.489 12575.000 0.004
10 AdaBoost RMSE 8.771 0.469 3.806 615.000 0.063
11 AdaBoost MAE 7.029 0.294 3.073 516.128 0.043
12 GBDT R Squared 86.100 99.970 94.500 86.500 86.200
13 GBDT MSE 35.820 0.050 3.207 35718.000 0.003
14 GBDT RMSE 5.985 0.224 1.791 485.508 0.058
15 GBDT MAE 4.093 0.086 1.236 332.121 0.041
16 Historical GBDT R Squared 83.000 99.990 92.900 86.600 74.900
17 Historical GBDT MSE 40.976 0.164 4.020 45127.000 0.005
18 Historical GBDT RMSE 6.401 0.405 2.005 495.000 0.074
19 Historical GBDT MAE 4.486 0.140 1.286 375.782 0.049
20 XGBoost R Squared 92.100 99.998 93.900 86.300 80.300
21 XGBoost MSE 21.212 0.118 3.867 33844.000 0.004
22 XGBoost RMSE 4.606 0.344 1.967 483.574 0.066
23 XGBoost MAE 3.764 0.140 1.337 349.100 0.040
24 XGBoost RF R Squared 83.300 99.979 86.400 75.700 85.000
25 XGBoost RF MSE 42.550 0.083 7.266 13130.000 0.003
26 XGBoost RF RMSE 6.523 0.289 2.680 634.000 0.058
27 XGBoost RF MAE 4.506 0.084 1.860 457.000 0.033
In [ ]:
plt.figure(figsize=(10, 6))
sns.barplot(data=hp_vis, x='Accuracy Metric', y='RAC Strength', hue='Algorithm')
plt.xlabel('Accuracy Metric')
plt.ylabel('RAC Strength')
plt.title('RAC Strength Accuracy Metrics')
plt.legend(title='Algorithm', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
plt.figure(figsize=(10, 6))
sns.barplot(data=hp_vis, x='Accuracy Metric', y='NAC Strength', hue='Algorithm')
plt.xlabel('Accuracy Metric')
plt.ylabel('NAC Strength')
plt.title('NAC Strength Accuracy Metrics')
plt.legend(title='Algorithm', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
plt.figure(figsize=(10, 6))
sns.barplot(data=hp_vis, x='Accuracy Metric', y='RAC Carbonization', hue='Algorithm')

plt.xlabel('Accuracy Metric')
plt.ylabel('RAC Carbonization')
plt.title('RAC Carbonization Accuracy Metrics')
plt.legend(title='Algorithm', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
plt.figure(figsize=(10, 6))
sns.barplot(data=hp_vis, x='Accuracy Metric', y='RAC Chloride Ion Erosion', hue='Algorithm')

plt.xlabel('Accuracy Metric')
plt.ylabel('RAC Chloride Ion Erosion')
plt.yscale('log') 
plt.title('RAC Chloride Ion Erosion Accuracy Metrics')
plt.legend(title='Algorithm', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
plt.figure(figsize=(10, 6))
sns.barplot(data=hp_vis, x='Accuracy Metric', y='RAC Sulfate Corrosion', hue='Algorithm')

plt.xlabel('Accuracy Metric')
plt.ylabel('RAC Sulfate Corrosion')
plt.yscale('log') 
plt.title('RAC Sulfate Corrosion Accuracy Metrics')
plt.legend(title='Algorithm', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
No description has been provided for this image

Model Training And Fitting Based On Optimized Hyper Parameters¶

In [ ]:
DT_detailed = DecisionTreeRegressor(criterion='squared_error',max_depth=5,max_features=100,min_samples_leaf=1,min_samples_split=2)
DT_cs = DecisionTreeRegressor(criterion='squared_error',max_depth=10,max_features=500,min_samples_leaf=1,min_samples_split=2)
DT_c= DecisionTreeRegressor(criterion='squared_error',max_depth=10,max_features=1000,min_samples_leaf=2,min_samples_split=2)
DT_cl = DecisionTreeRegressor(criterion='squared_error',max_depth=7,max_features=1000,min_samples_leaf=2,min_samples_split=2)
DT_sf = DecisionTreeRegressor(criterion='squared_error',max_depth=5,max_features=500,min_samples_leaf=1,min_samples_split=5)
In [ ]:
DT_Acc = []
def dt_fit(DT,X_train,Y_train,X_test,Y_test):
    DT.fit(X_train,Y_train)
    yhat = DT.predict(X_test)
    r_squared = r2_score(Y_test, yhat)
    mse = mean_squared_error(Y_test, yhat)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(Y_test, yhat)
    DT_row=[round(r_squared,3),round(mse,3),round(rmse,3),round(mae,3)]
    DT_Acc.append(DT_row)
In [ ]:
dt_fit(DT_detailed,X_train_det,Y_train_det,X_test_det,Y_test_det)
dt_fit(DT_cs,X_train_cs,Y_train_cs,X_test_cs,Y_test_cs)
dt_fit(DT_c,X_train_c,Y_train_c,X_test_c,Y_test_c)
dt_fit(DT_cl,X_train_cl,Y_train_cl,X_test_cl,Y_test_cl)
dt_fit(DT_sf,X_train_sf,Y_train_sf,X_test_sf,Y_test_sf)
In [ ]:
DT_Acc
Out[ ]:
[[0.551, 66.483, 8.154, 6.611],
 [1.0, 0.132, 0.363, 0.139],
 [0.762, 11.1, 3.332, 2.451],
 [0.66, 660358.524, 812.624, 539.447],
 [0.788, 0.013, 0.113, 0.084]]
In [ ]:
RF_detailed = RandomForestRegressor(bootstrap=True,max_depth=9,max_samples=0.9,min_samples_leaf=1,min_samples_split=2,n_estimators=300)
RF_cs = RandomForestRegressor(bootstrap=True,max_depth=9,max_samples=0.9,min_samples_leaf=1,min_samples_split=2,n_estimators=300)
RF_c= RandomForestRegressor(bootstrap=True,max_depth=9,max_samples=0.9,min_samples_leaf=1,min_samples_split=2,n_estimators=100)
RF_cl = RandomForestRegressor(bootstrap=True,max_depth=9,max_samples=0.7,min_samples_leaf=1,min_samples_split=2,n_estimators=100)
RF_sf = RandomForestRegressor(bootstrap=True,max_depth=7,max_samples=0.9,min_samples_leaf=1,min_samples_split=2,n_estimators=300)
In [ ]:
RF_Acc = []
def rf_fit(DT,X_train,Y_train,X_test,Y_test):
    DT.fit(X_train,Y_train)
    yhat = DT.predict(X_test)
    r_squared = r2_score(Y_test, yhat)
    mse = mean_squared_error(Y_test, yhat)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(Y_test, yhat)
    DT_row=[round(r_squared,3),round(mse,3),round(rmse,3),round(mae,3)]
    RF_Acc.append(DT_row)
In [ ]:
rf_fit(RF_detailed,X_train_det,Y_train_det,X_test_det,Y_test_det)
rf_fit(RF_cs,X_train_cs,Y_train_cs,X_test_cs,Y_test_cs)
rf_fit(RF_c,X_train_c,Y_train_c,X_test_c,Y_test_c)
rf_fit(RF_cl,X_train_cl,Y_train_cl,X_test_cl,Y_test_cl)
rf_fit(RF_sf,X_train_sf,Y_train_sf,X_test_sf,Y_test_sf)
In [ ]:
RF_Acc
Out[ ]:
[[0.878, 18.02, 4.245, 3.479],
 [1.0, 0.093, 0.306, 0.087],
 [0.844, 7.286, 2.699, 1.845],
 [0.874, 243738.176, 493.698, 410.485],
 [0.84, 0.01, 0.099, 0.069]]
In [ ]:
ada_detailed = AdaBoostRegressor(learning_rate=1,loss='square',n_estimators=50)
ada_cs = AdaBoostRegressor(learning_rate=1,loss='square',n_estimators=500)
ada_c= AdaBoostRegressor(learning_rate=0.1,loss='square',n_estimators=500)
ada_cl = AdaBoostRegressor(learning_rate=1,loss='square',n_estimators=1000)
ada_sf = AdaBoostRegressor(learning_rate=0.01,loss='square',n_estimators=100)
In [ ]:
ada_Acc = []
def ada_fit(DT,X_train,Y_train,X_test,Y_test):
    DT.fit(X_train,Y_train)
    yhat = DT.predict(X_test)
    r_squared = r2_score(Y_test, yhat)
    mse = mean_squared_error(Y_test, yhat)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(Y_test, yhat)
    DT_row=[round(r_squared,3),round(mse,3),round(rmse,3),round(mae,3)]
    ada_Acc.append(DT_row)
In [ ]:
ada_fit(ada_detailed,X_train_det,Y_train_det,X_test_det,Y_test_det)
ada_fit(ada_cs,X_train_cs,Y_train_cs,X_test_cs,Y_test_cs)
ada_fit(ada_c,X_train_c,Y_train_c,X_test_c,Y_test_c)
ada_fit(ada_cl,X_train_cl,Y_train_cl,X_test_cl,Y_test_cl)
ada_fit(ada_sf,X_train_sf,Y_train_sf,X_test_sf,Y_test_sf)
In [ ]:
ada_Acc
Out[ ]:
[[0.748, 37.41, 6.116, 5.249],
 [0.999, 0.363, 0.603, 0.424],
 [0.675, 15.136, 3.89, 3.172],
 [0.768, 450030.828, 670.843, 559.991],
 [0.765, 0.014, 0.119, 0.09]]
In [ ]:
GBDT_detailed = GradientBoostingRegressor(criterion='friedman_mse',learning_rate=0.075,loss='squared_error',max_depth=3,n_estimators=231,subsample=0.9)
GBDT_cs = GradientBoostingRegressor(criterion='friedman_mse',learning_rate=0.15,loss='squared_error',max_depth=5,n_estimators=100,subsample=0.8)
GBDT_c= GradientBoostingRegressor(criterion='friedman_mse',learning_rate=0.15,loss='squared_error',max_depth=3,n_estimators=231,subsample=0.8)
GBDT_cl = GradientBoostingRegressor(criterion='friedman_mse',learning_rate=0.2,loss='squared_error',max_depth=3,n_estimators=231,subsample=0.8)
GBDT_sf = GradientBoostingRegressor(criterion='friedman_mse',learning_rate=0.2,loss='squared_error',max_depth=3,n_estimators=50,subsample=0.9)
In [ ]:
GBDT_Acc = []
def GBDT_fit(DT,X_train,Y_train,X_test,Y_test):
    DT.fit(X_train,Y_train)
    yhat = DT.predict(X_test)
    r_squared = r2_score(Y_test, yhat)
    mse = mean_squared_error(Y_test, yhat)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(Y_test, yhat)
    DT_row=[round(r_squared,3),round(mse,3),round(rmse,3),round(mae,3)]
    GBDT_Acc.append(DT_row)
In [ ]:
GBDT_fit(GBDT_detailed,X_train_det,Y_train_det,X_test_det,Y_test_det)
GBDT_fit(GBDT_cs,X_train_cs,Y_train_cs,X_test_cs,Y_test_cs)
GBDT_fit(GBDT_c,X_train_c,Y_train_c,X_test_c,Y_test_c)
GBDT_fit(GBDT_cl,X_train_cl,Y_train_cl,X_test_cl,Y_test_cl)
GBDT_fit(GBDT_sf,X_train_sf,Y_train_sf,X_test_sf,Y_test_sf)
In [ ]:
GBDT_Acc
Out[ ]:
[[0.935, 9.624, 3.102, 2.37],
 [1.0, 0.043, 0.207, 0.079],
 [0.949, 2.388, 1.545, 1.162],
 [0.947, 103517.732, 321.742, 236.17],
 [0.88, 0.007, 0.085, 0.059]]
In [ ]:
histR_detailed = HistGradientBoostingRegressor(learning_rate=0.1,loss='squared_error',max_depth=4,max_iter=200,min_samples_leaf=5)
histR_cs = HistGradientBoostingRegressor(learning_rate=0.1,loss='squared_error',max_depth=5,max_iter=100,min_samples_leaf=5)
histR_c= HistGradientBoostingRegressor(learning_rate=0.1,loss='squared_error',max_depth=3,max_iter=500,min_samples_leaf=5)
histR_cl = HistGradientBoostingRegressor(learning_rate=0.2,loss='squared_error',max_depth=3,max_iter=200,min_samples_leaf=5)
histR_sf = HistGradientBoostingRegressor(learning_rate=0.2,loss='squared_error',max_depth=4,max_iter=500,min_samples_leaf=10)
In [ ]:
histR_Acc = []
def histR_fit(DT,X_train,Y_train,X_test,Y_test):
    DT.fit(X_train,Y_train)
    yhat = DT.predict(X_test)
    r_squared = r2_score(Y_test, yhat)
    mse = mean_squared_error(Y_test, yhat)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(Y_test, yhat)
    DT_row=[round(r_squared,3),round(mse,3),round(rmse,3),round(mae,3)]
    histR_Acc.append(DT_row)
In [ ]:
histR_fit(histR_detailed,X_train_det,Y_train_det,X_test_det,Y_test_det)
histR_fit(histR_cs,X_train_cs,Y_train_cs,X_test_cs,Y_test_cs)
histR_fit(histR_c,X_train_c,Y_train_c,X_test_c,Y_test_c)
histR_fit(histR_cl,X_train_cl,Y_train_cl,X_test_cl,Y_test_cl)
histR_fit(histR_sf,X_train_sf,Y_train_sf,X_test_sf,Y_test_sf)
In [ ]:
histR_Acc
Out[ ]:
[[0.931, 10.267, 3.204, 2.362],
 [0.999, 0.178, 0.421, 0.154],
 [0.949, 2.374, 1.541, 1.143],
 [0.966, 65795.204, 256.506, 199.023],
 [0.897, 0.006, 0.079, 0.055]]
In [ ]:
XGB_detailed = XGBRegressor(n_estimators=200,max_depth=5,learning_rate=0.1,subsample=0.9,colsample_bytree=0.8,gamma=0.5,reg_alpha=0.3)
XGB_cs = XGBRegressor(n_estimators=100,max_depth=5,learning_rate=0.1,subsample=1,colsample_bytree=1,gamma=0,reg_alpha=0.1)
XGB_c= XGBRegressor(n_estimators=200,max_depth=3,learning_rate=0.2,subsample=0.9,colsample_bytree=1,gamma=0.1,reg_alpha=0.3)
XGB_cl = XGBRegressor(n_estimators=207,max_depth=3,learning_rate=0.1,subsample=0.8,colsample_bytree=0.8,gamma=0,reg_alpha=0)
XGB_sf = XGBRegressor(n_estimators=200,max_depth=5,learning_rate=0.2,subsample=0.8,colsample_bytree=1,gamma=0,reg_alpha=0.1)
In [ ]:
XGB_Acc = []
def XGB_fit(DT,X_train,Y_train,X_test,Y_test):
    DT.fit(X_train,Y_train)
    yhat = DT.predict(X_test)
    r_squared = r2_score(Y_test, yhat)
    mse = mean_squared_error(Y_test, yhat)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(Y_test, yhat)
    DT_row=[round(r_squared,3),round(mse,3),round(rmse,3),round(mae,3)]
    XGB_Acc.append(DT_row)
In [ ]:
XGB_fit(XGB_detailed,X_train_det,Y_train_det,X_test_det,Y_test_det)
XGB_fit(XGB_cs,X_train_cs,Y_train_cs,X_test_cs,Y_test_cs)
XGB_fit(XGB_c,X_train_c,Y_train_c,X_test_c,Y_test_c)
XGB_fit(XGB_cl,X_train_cl,Y_train_cl,X_test_cl,Y_test_cl)
XGB_fit(XGB_sf,X_train_sf,Y_train_sf,X_test_sf,Y_test_sf)
In [ ]:
XGB_Acc
Out[ ]:
[[0.942, 8.666, 2.944, 2.103],
 [1.0, 0.057, 0.239, 0.134],
 [0.948, 2.428, 1.558, 1.154],
 [0.961, 75325.804, 274.455, 223.956],
 [0.884, 0.007, 0.084, 0.057]]
In [ ]:
XGBRF_detailed = XGBRFRegressor(learning_rate=0.3,max_depth=7,n_estimators=50,subsample=1,reg_lambda=0,reg_alpha=0)
XGBRF_cs = XGBRFRegressor(learning_rate=0.3,max_depth=7,n_estimators=50,subsample=1,reg_lambda=0,reg_alpha=0)
XGBRF_c= XGBRFRegressor(learning_rate=0.3,max_depth=7,n_estimators=100,subsample=1,reg_lambda=0,reg_alpha=0.1)
XGBRF_cl = XGBRFRegressor(learning_rate=0.3,max_depth=7,n_estimators=500,subsample=0.8,reg_lambda=0,reg_alpha=0.3)
XGBRF_sf = XGBRFRegressor(learning_rate=0.3,max_depth=7,n_estimators=50,subsample=1,reg_lambda=0,reg_alpha=0)
In [ ]:
XGBRF_Acc = []
def XGBRF_fit(DT,X_train,Y_train,X_test,Y_test):
    DT.fit(X_train,Y_train)
    yhat = DT.predict(X_test)
    r_squared = r2_score(Y_test, yhat)
    mse = mean_squared_error(Y_test, yhat)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(Y_test, yhat)
    DT_row=[round(r_squared,3),round(mse,3),round(rmse,3),round(mae,3)]
    XGBRF_Acc.append(DT_row)
In [ ]:
XGBRF_fit(RF_detailed,X_train_det,Y_train_det,X_test_det,Y_test_det)
XGBRF_fit(RF_cs,X_train_cs,Y_train_cs,X_test_cs,Y_test_cs)
XGBRF_fit(RF_c,X_train_c,Y_train_c,X_test_c,Y_test_c)
XGBRF_fit(RF_cl,X_train_cl,Y_train_cl,X_test_cl,Y_test_cl)
XGBRF_fit(RF_sf,X_train_sf,Y_train_sf,X_test_sf,Y_test_sf)
In [ ]:
XGBRF_Acc
Out[ ]:
[[0.891, 16.224, 4.028, 3.295],
 [1.0, 0.104, 0.322, 0.092],
 [0.856, 6.697, 2.588, 1.86],
 [0.834, 321240.092, 566.78, 443.317],
 [0.839, 0.01, 0.099, 0.07]]

Residual Analysis Based On Best Model (According to Accuracy Metrics)¶

In [ ]:
yhat=model_opt_det.predict(X_train_det)
r_squared = r2_score(Y_train_det,yhat)
r_squared
Out[ ]:
0.9904442412981427
In [ ]:
yhat =model_opt_det.predict(X_test_det)
r_squared = r2_score(Y_test_det, yhat)
r_squared
Out[ ]:
0.9586735753743867
In [ ]:
detailed_predicted = model_opt_det.predict(X_train_det)
plt.figure(figsize=(20, 6))
plt.plot(detailed_predicted, color='red', label='Actual')
plt.plot(Y_train_det, color='blue', label='Predicted')
plt.xlabel('Sample')
plt.ylabel('Values')
plt.title('Predicted vs Actual Values Of RAC Strength Prediction on Train Set')
plt.legend()
plt.show()
No description has been provided for this image
In [ ]:
detailed_predicted = model_opt_det.predict(X_test_det)
plt.figure(figsize=(20, 6))
plt.plot(detailed_predicted, color='red', label='Actual')
plt.plot(Y_test_det, color='blue', label='Predicted')
plt.xlabel('Sample')
plt.ylabel('Values')
plt.title('Predicted vs Actual Values Of RAC Strength Prediction on Test Set')
plt.legend()
plt.show()
No description has been provided for this image
In [ ]:
yhat =model_opt_cs.predict(X_test_cs)
r_squared = r2_score(Y_test_cs, yhat)
r_squared
Out[ ]:
0.9997071870881837
In [ ]:
cs_predicted = model_opt_cs.predict(X_test_cs)
In [ ]:
plt.figure(figsize=(20, 6))
plt.plot(cs_predicted, color='red', label='Actual',linewidth=3)
plt.plot(Y_test_cs, color='blue', label='Predicted')
plt.xlabel('Sample')
plt.ylabel('Values')
plt.title('Predicted vs Actual Values Of NAC Strength Prediction')
plt.legend()
plt.show()
No description has been provided for this image
In [ ]:
yhat =model_opt_c.predict(X_test_c)
r_squared = r2_score(Y_test_c, yhat)
r_squared
Out[ ]:
0.9316704193603992
In [ ]:
c_predicted = model_opt_c.predict(X_train_c)

plt.figure(figsize=(20, 6))
plt.plot(c_predicted, color='red', label='Actual',linewidth=1)
plt.plot(Y_train_c, color='blue', label='Predicted')
plt.xlabel('Sample')
plt.ylabel('Values')
plt.title('Predicted vs Actual Values Of RAC Carbonization Depth On Train Set')
plt.legend()
plt.show()
No description has been provided for this image
In [ ]:
c_predicted = model_opt_c.predict(X_test_c)

plt.figure(figsize=(20, 6))
plt.plot(c_predicted, color='red', label='Actual',linewidth=1)
plt.plot(Y_test_c, color='blue', label='Predicted')
plt.xlabel('Sample')
plt.ylabel('Values')
plt.title('Predicted vs Actual Values Of RAC Carbonization Depth On Test Set')
plt.legend()
plt.show()
No description has been provided for this image
In [ ]:
yhat =model_opt_cl.predict(X_train_cl)
r_squared = r2_score(Y_train_cl, yhat)
print(r_squared)
cl_predicted = model_opt_cl.predict(X_train_cl)

plt.figure(figsize=(20, 6))
plt.plot(cl_predicted, color='red', label='Actual')
plt.plot(Y_train_cl, color='blue', label='Predicted')
plt.xlabel('Sample')
plt.ylabel('Values')
plt.title('Predicted vs Actual Values Of RAC Chloride Ion Erosion On Training Set')
plt.legend()
plt.show()
0.9963958441783672
No description has been provided for this image
In [ ]:
yhat =model_opt_cl.predict(X_test_cl)
r_squared = r2_score(Y_test_cl, yhat)
r_squared
Out[ ]:
0.9540851277593169
In [ ]:
cl_predicted = model_opt_cl.predict(X_test_cl)

plt.figure(figsize=(20, 6))
plt.plot(cl_predicted, color='red', label='Actual',linewidth=1)
plt.plot(Y_test_cl, color='blue', label='Predicted')
plt.xlabel('Sample')
plt.ylabel('Values')
plt.title('Predicted vs Actual Values Of RAC Chloride Ion Erosion On Test Set')
plt.legend()
plt.show()
No description has been provided for this image
In [ ]:
yhat =model_opt_sf.predict(X_train_sf)
r_squared = r2_score(Y_train_sf, yhat)
print(r_squared)
sf_predicted = model_opt_sf.predict(X_train_sf)

plt.figure(figsize=(20, 6))
plt.plot(sf_predicted, color='red', label='Actual',linewidth=1)
plt.plot(Y_train_sf, color='blue', label='Predicted')
plt.xlabel('Sample')
plt.ylabel('Values')
plt.title('Predicted vs Actual Values Of RAC Sulfate Corrosion On Training Set')
plt.legend()
plt.show()
0.9947415003055488
No description has been provided for this image
In [ ]:
yhat =model_opt_sf.predict(X_test_sf)
r_squared = r2_score(Y_test_sf, yhat)
r_squared
Out[ ]:
0.8775381393119359
In [ ]:
sf_predicted = model_opt_sf.predict(X_test_sf)

plt.figure(figsize=(20, 6))
plt.plot(sf_predicted, color='red', label='Actual',linewidth=1)
plt.plot(Y_test_sf, color='blue', label='Predicted')
plt.xlabel('Sample')
plt.ylabel('Values')
plt.title('Predicted vs Actual Values Of RAC Sulfate Corrosion On Test Set')
plt.legend()
plt.show()
No description has been provided for this image

Exporting Trained Models For Future Use Of Predictions¶

In [ ]:
test = [[165,370,650,850.5,364.5,2.22,20,2400,4.9]]
test1 = transform_X_detailed.fit_transform(test)
result=model_opt_det.predict(test1)
print(result)
[41.56649]
In [ ]:
import joblib
In [ ]:
joblib.dump(model_opt_det, 'RAC Strength Prediction.pkl')
joblib.dump(model_opt_cs,'NAC Strength Prediction.pkl')
joblib.dump(model_opt_cl,'RAC Chloride Ion Prediction.pkl')
joblib.dump(model_opt_c,'RAC Carbonization Prediction.pkl')
joblib.dump(model_opt_sf,'RAC Sulfate Corrosion Prediction.pkl')
Out[ ]:
['RAC Sulfate Corrosion Prediction.pkl']
In [ ]:
joblib.dump(transform_X_detailed, 'transform_X_detailed.pkl')
joblib.dump(transform_X_cs,'transform_X_cs.pkl')
joblib.dump(transform_X_chloride,'transform_X_chloride.pkl')
joblib.dump(transform_X_carbonization,'transform_X_carbonization.pkl')
joblib.dump(transform_X_sulfate,'transform_X_sulfate.pkl')
Out[ ]:
['transform_X_detailed.pkl']